def __init__( self, model: Model, best_f: Union[float, Tensor], objective_index: int, constraints: Dict[int, Tuple[Optional[float], Optional[float]]], maximize: bool = True, ) -> None: r"""Analytic Constrained Expected Improvement. Args: model: A fitted single-outcome model. best_f: Either a scalar or a `b`-dim Tensor (batch mode) representing the best function value observed so far (assumed noiseless). objective_index: The index of the objective. constraints: A dictionary of the form `{i: [lower, upper]}`, where `i` is the output index, and `lower` and `upper` are lower and upper bounds on that output (resp. interpreted as -Inf / Inf if None) maximize: If True, consider the problem a maximization problem. """ super().__init__(model=model) self.maximize = maximize self.objective_index = objective_index self.constraints = constraints self.register_buffer("best_f", torch.as_tensor(best_f)) self._preprocess_constraint_bounds(constraints=constraints) self.register_forward_pre_hook(convert_to_target_pre_hook)
def __getitem__(self, idx): img, anno = super(COCODataset, self).__getitem__(idx) # filter crowd annotations # TODO might be better to add an extra field anno = [obj for obj in anno if obj["iscrowd"] == 0] boxes = [obj["bbox"] for obj in anno] boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") classes = [obj["category_id"] for obj in anno] classes = [self.json_category_id_to_contiguous_id[c] for c in classes] classes = torch.tensor(classes) target.add_field("labels", classes) masks = [obj["segmentation"] for obj in anno] masks = SegmentationMask(masks, img.size) target.add_field("masks", masks) target = target.clip_to_image(remove_empty=True) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
def __init__(self, polygons, size, mode): # assert isinstance(polygons, list), '{}'.format(polygons) if isinstance(polygons, list): polygons = [torch.as_tensor(p, dtype=torch.float32) for p in polygons] elif isinstance(polygons, Polygons): polygons = polygons.polygons self.polygons = polygons self.size = size self.mode = mode
def align_and_update_state_dicts(model_state_dict, loaded_state_dict): """ Strategy: suppose that the models that we will create will have prefixes appended to each of its keys, for example due to an extra level of nesting that the original pre-trained weights from ImageNet won't contain. For example, model.state_dict() might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains res2.conv1.weight. We thus want to match both parameters together. For that, we look for each model weight, look among all loaded keys if there is one that is a suffix of the current weight name, and use it if that's the case. If multiple matches exist, take the one with longest size of the corresponding name. For example, for the same model as before, the pretrained weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case, we want to match backbone[0].body.conv1.weight to conv1.weight, and backbone[0].body.res2.conv1.weight to res2.conv1.weight. """ current_keys = sorted(list(model_state_dict.keys())) loaded_keys = sorted(list(loaded_state_dict.keys())) # get a matrix of string matches, where each (i, j) entry correspond to the size of the # loaded_key string, if it matches match_matrix = [ len(j) if i.endswith(j) else 0 for i in current_keys for j in loaded_keys ] match_matrix = torch.as_tensor(match_matrix).view( len(current_keys), len(loaded_keys) ) max_match_size, idxs = match_matrix.max(1) # remove indices that correspond to no-match idxs[max_match_size == 0] = -1 # used for logging max_size = max([len(key) for key in current_keys]) if current_keys else 1 max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1 log_str_template = "{: <{}} loaded from {: <{}} of shape {}" logger = logging.getLogger(__name__) for idx_new, idx_old in enumerate(idxs.tolist()): if idx_old == -1: continue key = current_keys[idx_new] key_old = loaded_keys[idx_old] model_state_dict[key] = loaded_state_dict[key_old] logger.info( log_str_template.format( key, max_size, key_old, max_size_loaded, tuple(loaded_state_dict[key_old].shape), ) )
def __init__(self, sampler, group_ids, batch_size, drop_uneven=False): if not isinstance(sampler, Sampler): raise ValueError( "sampler should be an instance of " "torch.utils.data.Sampler, but got sampler={}".format(sampler) ) self.sampler = sampler self.group_ids = torch.as_tensor(group_ids) assert self.group_ids.dim() == 1 self.batch_size = batch_size self.drop_uneven = drop_uneven self.groups = torch.unique(self.group_ids).sort(0)[0] self._can_reuse_batches = False
def __init__(self, bbox, image_size, mode="xyxy"): device = bbox.device if isinstance(bbox, torch.Tensor) else torch.device("cpu") bbox = torch.as_tensor(bbox, dtype=torch.float32, device=device) if bbox.ndimension() != 2: raise ValueError( "bbox should have 2 dimensions, got {}".format(bbox.ndimension()) ) if bbox.size(-1) != 4: raise ValueError( "last dimenion of bbox should have a " "size of 4, got {}".format(bbox.size(-1)) ) if mode not in ("xyxy", "xywh"): raise ValueError("mode should be 'xyxy' or 'xywh'") self.bbox = bbox self.size = image_size # (image_width, image_height) self.mode = mode self.extra_fields = {}
def segment(stack, method='ensemble', pad_mode='reflect', seg_threshold=0.8, min_voxels=65, max_voxels=4168, compactness_factor=0.05): """ Utility function to segment a 3-d stack :param stack: 3-d array. Raw Stack resampled to 1 mm^3. :param method: A string from 'single' or 'ensemble'. Whether to use a single model or an ensemble of models for segmentation. :param pad_mode: How will the stack be padded. Any valid mode from np.pad. :param seg_threshold: Threshold used to produce instance segmentations. :param min_voxels: Minimum number of voxels in a valid object. :param max_voxels: Maximum number of voxels in a valid object. :param compactness_factor: Weight for the compactness objective during instance segmentation. :return: detection, segmentation, instance. Arrays of the same shape as stack: voxel-wise centroid probability (np.float32), voxel-wise cell probability (np.float32) and instance segmentation (np.int32). """ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') if device.type == 'cpu': print('Running 3-d segmentation in the CPU will take more time.') # Prepare input padded = np.pad(stack, 20, mode=pad_mode) lcned= utils.lcn(padded, (3, 25, 25)) norm = (lcned - lcned.mean()) / lcned.std() input_ = torch.as_tensor(norm[np.newaxis, np.newaxis, ...]) # 1 x 1 x D x H x W del padded, lcned, norm # release memory # Declare models net = models.QCANet() data_path = '/data/pipeline/python/pipeline/data/' if method == 'single': model_names = ['bestndn_1-9-17026.pth'] else: model_names = ['bestndn_1-9-17026.pth', 'bestndn_1-17-17206.pth', 'bestndn_1-3-17259.pth', 'bestndn_1-8-17261.pth'] # we'll ensemble all of these # Create detection and segmentation probabilities detection_sum = np.empty(input_.shape[-3:], dtype=np.float32) segmentation_sum = np.empty(input_.shape[-3:], dtype=np.float32) with torch.no_grad(): for model_name in model_names: # Import model from file net.load_state_dict(torch.load(path.join(data_path, model_name))) net.eval() net.to(device) # Segment detection, segmentation = net.forward_on_big_input(input_) detection_sum += torch.sigmoid(detection).squeeze().numpy() segmentation_sum += torch.sigmoid(segmentation).squeeze().numpy() detection = detection_sum / len(model_names) segmentation = segmentation_sum / len(model_names) del input_, detection_sum, segmentation_sum # release memory # Drop padding (added above) detection = detection[20:-20, 20:-20, 20:-20] segmentation = segmentation[20:-20, 20:-20, 20:-20] # Create instance segmentation instance = utils.prob2labels(detection, segmentation, seg_threshold=seg_threshold, min_voxels=min_voxels, max_voxels=max_voxels, compactness_factor=compactness_factor) return detection, segmentation, instance
def to_image(*numbers): return torch.as_tensor(numbers).reshape(1, 1, 1, len(numbers))
def update(self, obs, action, reward, next_obs, terminal): if self.training: if hasattr(self, 'local_buffer'): self.add_to_local_buffer(obs, action, reward, next_obs, terminal) self.add_to_memory(obs, action, reward, next_obs, terminal) for i, done in enumerate(terminal): if done: try: self.local_buffer[i].reset() except AttributeError: pass if self.training: try: self.exploration_noise[i].reset_states() except AttributeError: pass if self.ready_to_update(): self.online_network.eval() # Maybe not the best way to do a line break, # but I like it more than \ (obs, action, reward, next_obs, terminal, weight, indices) = self.sample_from_memory(self.batch_size) update_target = self.update_target(obs, action, reward, next_obs, terminal) self.online_network.train() if self.n_steps > 1: obs = obs[:, 0] action = action[:, 0] if weight is None: weight = torch.ones(self.batch_size, device=self._device) else: weight = torch.as_tensor(weight, dtype=torch.float32, device=self._device) assert (weight.dim() == 1), 'TODO' Q_sa = self.online_network.critic_value(obs, action) td_error = Q_sa - update_target critic_loss = (td_error).pow(2).mul(0.5).squeeze(-1) critic_loss = (critic_loss * weight).mean() self.critic_optimizer.zero_grad() critic_loss.backward() if self.clip_gradients: nn.utils.clip_grad_norm_(self.online_network.critic_params, self.clip_gradients) self.critic_optimizer.step() action = self.online_network.action(obs) policy_loss = -self.online_network.critic_value(obs, action).mean() self.policy_optimizer.zero_grad() policy_loss.backward() if self.clip_gradients: nn.utils.clip_grad_norm_(self.online_network.policy_params, self.clip_gradients) self.policy_optimizer.step() self.soft_update() self.online_network.eval() updated_p = (np.abs(td_error.detach().cpu().numpy().squeeze()) + 1e-8) self.update_priorities(indices, updated_p) if self.training: self.current_step += 1
def __init__(self, M, const_diagonal=False): self.mat = M = torch.as_tensor(M) self.m, self.n = self.shape = M.shape self._cache = {}
def forward( self, src: torch.Tensor, theta: torch.Tensor, spatial_size: Optional[Union[Sequence[int], int]] = None) -> torch.Tensor: """ ``theta`` must be an affine transformation matrix with shape 3x3 or Nx3x3 or Nx2x3 or 2x3 for spatial 2D transforms, 4x4 or Nx4x4 or Nx3x4 or 3x4 for spatial 3D transforms, where `N` is the batch size. `theta` will be converted into float Tensor for the computation. Args: src (array_like): image in spatial 2D or 3D (N, C, spatial_dims), where N is the batch dim, C is the number of channels. theta (array_like): Nx3x3, Nx2x3, 3x3, 2x3 for spatial 2D inputs, Nx4x4, Nx3x4, 3x4, 4x4 for spatial 3D inputs. When the batch dimension is omitted, `theta` will be repeated N times, N is the batch dim of `src`. spatial_size: output spatial shape, the full output shape will be `[N, C, *spatial_size]` where N and C are inferred from the `src`. Raises: TypeError: When ``theta`` is not a ``torch.Tensor``. ValueError: When ``theta`` is not one of [Nxdxd, dxd]. ValueError: When ``theta`` is not one of [Nx3x3, Nx4x4]. TypeError: When ``src`` is not a ``torch.Tensor``. ValueError: When ``src`` spatially is not one of [2D, 3D]. ValueError: When affine and image batch dimension differ. """ # validate `theta` if not isinstance(theta, torch.Tensor): raise TypeError( f"theta must be torch.Tensor but is {type(theta).__name__}.") if theta.dim() not in (2, 3): raise ValueError(f"theta must be Nxdxd or dxd, got {theta.shape}.") if theta.dim() == 2: theta = theta[None] # adds a batch dim. theta = theta.clone() # no in-place change of theta theta_shape = tuple(theta.shape[1:]) if theta_shape in ((2, 3), (3, 4)): # needs padding to dxd pad_affine = torch.tensor([0, 0, 1] if theta_shape[0] == 2 else [0, 0, 0, 1]) pad_affine = pad_affine.repeat(theta.shape[0], 1, 1).to(theta) pad_affine.requires_grad = False theta = torch.cat([theta, pad_affine], dim=1) if tuple(theta.shape[1:]) not in ((3, 3), (4, 4)): raise ValueError( f"theta must be Nx3x3 or Nx4x4, got {theta.shape}.") # validate `src` if not isinstance(src, torch.Tensor): raise TypeError( f"src must be torch.Tensor but is {type(src).__name__}.") sr = src.dim() - 2 # input spatial rank if sr not in (2, 3): raise ValueError( f"Unsupported src dimension: {sr}, available options are [2, 3]." ) # set output shape src_size = tuple(src.shape) dst_size = src_size # default to the src shape if self.spatial_size is not None: dst_size = src_size[:2] + self.spatial_size if spatial_size is not None: dst_size = src_size[:2] + ensure_tuple(spatial_size) # reverse and normalize theta if needed if not self.normalized: theta = to_norm_affine( affine=theta, src_size=src_size[2:], dst_size=dst_size[2:], align_corners=self.align_corners, zero_centered=self.zero_centered, ) if self.reverse_indexing: rev_idx = torch.as_tensor(range(sr - 1, -1, -1), device=src.device) theta[:, :sr] = theta[:, rev_idx] theta[:, :, :sr] = theta[:, :, rev_idx] if (theta.shape[0] == 1) and src_size[0] > 1: # adds a batch dim to `theta` in order to match `src` theta = theta.repeat(src_size[0], 1, 1) if theta.shape[0] != src_size[0]: raise ValueError( f"affine and image batch dimension must match, got affine={theta.shape[0]} image={src_size[0]}." ) grid = nn.functional.affine_grid(theta=theta[:, :sr], size=list(dst_size), align_corners=self.align_corners) dst = nn.functional.grid_sample( input=src.contiguous(), grid=grid, mode=self.mode, padding_mode=self.padding_mode, align_corners=self.align_corners, ) return dst
def learn(self, states, actions, rewards, returns, history, args) -> History: states = torch.as_tensor(states, dtype=torch.float, device=self.device) rlsz = self.rollouts * states.size(1) states = states.reshape(rlsz, states.shape[2], states.shape[3], states.shape[4]) actions = torch.as_tensor(actions, dtype=torch.long, device=self.device).reshape(rlsz, -1) returns = torch.as_tensor(returns, dtype=torch.float, device=self.device).reshape(rlsz, -1) for epoch in range(self.epochs): ixs = torch.randint(rlsz, size=(self.batch_size, ), dtype=torch.long) s = states[ixs] a = actions[ixs].reshape(-1) r = returns[ixs].reshape(-1) prepolicy, state_values = self(s) state_values = state_values.reshape(-1) policy_curr = Categorical(logits=prepolicy) # Compute normalized, critic-adjusted returns adv = r - state_values adv = (adv - adv.mean()) / adv.std() # Get log_probs for ratio -- Do not backprop through old policy! with torch.no_grad(): prepolicy, _ = self.old_policy(s) log_probs_old = Categorical(logits=prepolicy).log_prob(a) log_probs_curr = policy_curr.log_prob(a) ratio = torch.exp(log_probs_curr - log_probs_old) # Get current policy's entropy entropy = policy_curr.entropy().mean() # Calculate loss vf_loss = nn.functional.mse_loss(state_values, r.squeeze()) pi_loss = -torch.min( (adv * ratio), (adv * ratio.clamp(1 - self.clipping, 1 + self.clipping))).mean() loss = pi_loss + self.critic_coeff * vf_loss - self.entropy_bonus * entropy # Logging history["writer"].add_scalar("Train/policy_loss", pi_loss.item(), history["t_learn"]) history["writer"].add_scalar("Train/value_loss", vf_loss.item(), history["t_learn"]) history["writer"].add_scalar("Train/policy_entropy", entropy, history["t_learn"]) # Backprop and step with optional gradient logging self.optim.zero_grad() loss.backward() if self.log_gradients: for name, param in self.named_parameters(): if param.grad is not None: history["writer"].add_histogram( name, param.grad.clone().cpu().data.numpy(), history["t"]) self.optim.step() history["t_learn"] += 1 return history
def evaluate_box_proposals(predictions, dataset, thresholds=None, area="all", limit=None): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for image_id, prediction in enumerate(predictions): original_id = dataset.id_to_img_map[image_id] img_info = dataset.get_img_info(image_id) image_width = img_info["width"] image_height = img_info["height"] prediction = prediction.resize((image_width, image_height)) # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = prediction.get_field("objectness").sort(descending=True)[1] prediction = prediction[inds] ann_ids = dataset.coco.getAnnIds(imgIds=original_id) anno = dataset.coco.loadAnns(ann_ids) gt_boxes = [obj["bbox"] for obj in anno if obj["iscrowd"] == 0] gt_boxes = torch.as_tensor(gt_boxes).reshape( -1, 4) # guard against no boxes gt_boxes = BoxList(gt_boxes, (image_width, image_height), mode="xywh").convert("xyxy") gt_areas = torch.as_tensor( [obj["area"] for obj in anno if obj["iscrowd"] == 0]) if len(gt_boxes) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if len(prediction) == 0: continue if limit is not None and len(prediction) > limit: prediction = prediction[:limit] overlaps = boxlist_iou(prediction, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(prediction), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = torch.cat(gt_overlaps, dim=0) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, }
def forward(self, outputs, processed_sizes, target_sizes=None): """ This function computes the panoptic prediction from the model's predictions. Parameters: outputs: This is a dict coming directly from the model. See the model doc for the content. processed_sizes: This is a list of tuples (or torch tensors) of sizes of the images that were passed to the model, ie the size after data augmentation but before batching. target_sizes: This is a list of tuples (or torch tensors) corresponding to the requested final size of each prediction. If left to None, it will default to the processed_sizes """ if target_sizes is None: target_sizes = processed_sizes assert len(processed_sizes) == len(target_sizes) out_logits, raw_masks, raw_boxes = outputs["pred_logits"], outputs[ "pred_masks"], outputs["pred_boxes"] assert len(out_logits) == len(raw_masks) == len(target_sizes) preds = [] def to_tuple(tup): if isinstance(tup, tuple): return tup return tuple(tup.cpu().tolist()) for cur_logits, cur_masks, cur_boxes, size, target_size in zip( out_logits, raw_masks, raw_boxes, processed_sizes, target_sizes): # we filter empty queries and detection below threshold scores, labels = cur_logits.softmax(-1).max(-1) keep = labels.ne(outputs["pred_logits"].shape[-1] - 1) & (scores > self.threshold) cur_scores, cur_classes = cur_logits.softmax(-1).max(-1) cur_scores = cur_scores[keep] cur_classes = cur_classes[keep] cur_masks = cur_masks[keep] cur_masks = interpolate(cur_masks[None], to_tuple(size), mode="bilinear").squeeze(0) cur_boxes = box_ops.box_cxcywh_to_xyxy(cur_boxes[keep]) h, w = cur_masks.shape[-2:] assert len(cur_boxes) == len(cur_classes) # It may be that we have several predicted masks for the same stuff class. # In the following, we track the list of masks ids for each stuff class (they are merged later on) cur_masks = cur_masks.flatten(1) stuff_equiv_classes = defaultdict(lambda: []) for k, label in enumerate(cur_classes): if not self.is_thing_map[label.item()]: stuff_equiv_classes[label.item()].append(k) def get_ids_area(masks, scores, dedup=False): # This helper function creates the final panoptic segmentation image # It also returns the area of the masks that appears on the image m_id = masks.transpose(0, 1).softmax(-1) if m_id.shape[-1] == 0: # We didn't detect any mask :( m_id = torch.zeros((h, w), dtype=torch.long, device=m_id.device) else: m_id = m_id.argmax(-1).view(h, w) if dedup: # Merge the masks corresponding to the same stuff class for equiv in stuff_equiv_classes.values(): if len(equiv) > 1: for eq_id in equiv: m_id.masked_fill_(m_id.eq(eq_id), equiv[0]) final_h, final_w = to_tuple(target_size) seg_img = Image.fromarray(id2rgb( m_id.view(h, w).cpu().numpy())) seg_img = seg_img.resize(size=(final_w, final_h), resample=Image.NEAREST) np_seg_img = (torch.ByteTensor( torch.ByteStorage.from_buffer(seg_img.tobytes())).view( final_h, final_w, 3).numpy()) m_id = torch.from_numpy(rgb2id(np_seg_img)) area = [] for i in range(len(scores)): area.append(m_id.eq(i).sum().item()) return area, seg_img area, seg_img = get_ids_area(cur_masks, cur_scores, dedup=True) if cur_classes.numel() > 0: # We know filter empty masks as long as we find some while True: filtered_small = torch.as_tensor( [area[i] <= 4 for i, c in enumerate(cur_classes)], dtype=torch.bool, device=keep.device) if filtered_small.any().item(): cur_scores = cur_scores[~filtered_small] cur_classes = cur_classes[~filtered_small] cur_masks = cur_masks[~filtered_small] area, seg_img = get_ids_area(cur_masks, cur_scores) else: break else: cur_classes = torch.ones(1, dtype=torch.long, device=cur_classes.device) segments_info = [] for i, a in enumerate(area): cat = cur_classes[i].item() segments_info.append({ "id": i, "isthing": self.is_thing_map[cat], "category_id": cat, "area": a }) del cur_classes with io.BytesIO() as out: seg_img.save(out, format="PNG") predictions = { "png_string": out.getvalue(), "segments_info": segments_info } preds.append(predictions) return preds
def dist_matrix(x, y, c=1.0): c = torch.as_tensor(c).type_as(x) return _dist_matrix(x, y, c)
def __getitem__(self, index): # Load data and get label X = torch.as_tensor(self.data[index]).long() y = torch.as_tensor(self.labels[index]) return X, y
if continue_train: state = torch.load(path_name) self.lr = state['lr'] self.model.load_state_dict(state['model']) self.optimizer.load_state_dict(state['optimizer']) self.histories = [[], []] else: torch.save(self.model, path_name) def print_model(self): batch_in = next(iter(self.loaders[0]))[0] summary(self.model, batch_in.shape) if __name__ == "__main__": possibleRes = torch.as_tensor(np.array([0, 1])).float() tttIn = np.random.randint(0, 2, [100, 9]) tttOut = np.array( [[(tttIn[i, 0] and tttIn[i, 4] and tttIn[i, 8]) or (tttIn[i, 2] and tttIn[i, 4] and tttIn[i, 6]) for i in range(100)]]).transpose() # tttOutNot = (tttOut + 1) % 2 # tttOut = np.concatenate([tttOut, tttOutNot], axis=0).transpose() tttData = data.TensorDataset(torch.as_tensor(tttIn).float(), torch.as_tensor(tttOut).float()) model = nn.Linear(9, 1) print(model(torch.from_numpy(np.array([i for i in range(9)])).float())) lr = 0.1 optimizer = optim.SGD(model.parameters(), lr) loss_func = nn.MSELoss()
def resize( data: dict, size: Union[int, Tuple[int, int], Tuple[int, int, int]], max_size: Optional[int] = None, ): # size can be min_size (scalar) or (w, h) tuple def get_size_with_aspect_ratio(image_size, size, max_size=None): w, h = image_size if max_size is not None: min_original_size = float(min(image_size)) max_original_size = float(max(image_size)) if max_original_size / min_original_size * size > max_size: size = int( round(max_size * min_original_size / max_original_size)) if max(image_size) == size: return size if (w <= h and w == size) or (h <= w and h == size): return (h, w) if w < h: ow = size oh = int(size * h / w) else: oh = size ow = int(size * w / h) return (oh, ow) def get_size(image_size, size, max_size=None): if isinstance(size, (list, tuple)): return size[::-1] else: return get_size_with_aspect_ratio(image_size, size, max_size) image = data["data"] target = data.copy() size = get_size(image.size, size, max_size) rescaled_image = F.resize(image, size) if len(target) == 1: return {"data": rescaled_image} ratios = tuple( float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size)) ratio_width, ratio_height = ratios target = target.copy() if "boxes" in target: boxes = target["boxes"] scaled_boxes = boxes * torch.as_tensor( [ratio_width, ratio_height, ratio_width, ratio_height]) target["boxes"] = scaled_boxes if "area" in target: area = target["area"] scaled_area = area * (ratio_width * ratio_height) target["area"] = scaled_area target["size"] = torch.tensor(size) if "masks" in target: target["masks"] = (interpolate(target["masks"][:, None].float(), size, mode="nearest")[:, 0] > 0.5) target["data"] = rescaled_image return target
cmd = 'cp train_acos_regressor_24_joints.py ./{}/snapshot.py'.format(ckpt_path) else: cmd = r'copy train_acos_regressor_24_joints.py {}\snapshot.py'.format(ckpt_path) print(cmd) os.system(cmd) file = open('{}/validation.txt'.format(ckpt_path), 'w') trans = torch.zeros((batch_size, 3), dtype=torch.float64, device=device) while batch_num < max_batch_num: batch_num += 1 print('Epoch %03d: training...' % batch_num) reg.train() for (i, data) in enumerate(dataloader): joints = torch.as_tensor(data['joints'], device=device) thetas = torch.as_tensor(data['thetas'], device=device) betas = torch.as_tensor(data['betas'], device=device) pred_thetas = reg(joints) _, recon_joints = smpl(betas, pred_thetas, trans) loss_joints = loss_op(recon_joints, joints) loss_thetas = loss_(pred_thetas, thetas) loss = loss_thetas + 5 * loss_joints optimizer.zero_grad() loss.backward() optimizer.step() if i % 32 == 0: print('batch %04d: loss joints: %10.6f loss thetas: % 10.6f' \ % (i, loss_joints.data.item(), loss_thetas.data.item()))
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy( dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) for i in range(len(dataset_dict["annotations"])): dataset_dict["annotations"][i]["segmentation"] = [] ### my code ## image, dataset_dict = self.aug_handler( image=image, dataset_dict_detectron=dataset_dict) ### my code ## if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor( np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals(dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk) if not self.is_train: # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) dataset_dict["sem_seg"] = sem_seg_gt if True: vis_img = image.copy() bbox_list = [ BBox.from_list(vals) for vals in dataset_dict["instances"].gt_boxes.tensor.numpy().tolist() ] # seg_list = [Segmentation([Polygon.from_list(poly.tolist(), demarcation=False) for poly in seg_polys]) for seg_polys in dataset_dict["instances"].gt_masks.polygons] for bbox in (bbox_list): # if len(seg) > 0 and False: # vis_img = draw_segmentation(img=vis_img, segmentation=seg, transparent=True) vis_img = draw_bbox(img=vis_img, bbox=bbox) aug_vis.step(vis_img) return dataset_dict
if not args.separate_constants: len_constant=args.batch_size images=[torch.ones([1,3,16,16])*(-1+2*i/(len_constant-1)) for i in range(len_constant)] constant_images=torch.cat(images) else: len_constant=args.batch_size images=[torch.ones([1,3,16,16])*(-1+2*i/(len_constant-1)) for i in range(len_constant)] constant_image_list=images # This code creates the low-variance images # In[13]: len_low_variance=args.batch_size X=get_truncated_normal(mean=0,sd=0.05,low=-1,upp=1) lowvar_images=torch.reshape(torch.as_tensor(X.rvs(len_low_variance*256*3),dtype=torch.float),(len_low_variance,3,16,16)) model = PixelCNN(nr_resnet=args.nr_resnet, nr_filters=args.nr_filters, input_channels=input_channels, nr_logistic_mix=args.nr_logistic_mix) model = model.cuda() # This code creates the SVHN images # In[15]: x=datasets.SVHN(root=args.data_dir,split='train',transform=ds_transforms,download=True) train_loader_svhn=torch.utils.data.DataLoader(x,batch_size=args.batch_size,shuffle=True) image_batch=next(iter(train_loader_svhn))[0] print(image_batch.shape) if args.load_params:
def normalize(self, image): dtype, device = image.dtype, image.device mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device) std = torch.as_tensor(self.image_std, dtype=dtype, device=device) return (image - mean[:, None, None]) / std[:, None, None]
def detect_face(imgs, minsize, pnet, rnet, onet, threshold, factor, device): if not isinstance(imgs, Iterable): imgs = [imgs] if any(img.shape != imgs[0].shape for img in imgs): raise Exception( "MTCNN batch processing only compatible with equal-dimension images." ) if not isinstance(imgs, torch.Tensor): imgs_np = np.stack([np.uint8(img) for img in imgs]) imgs = torch.as_tensor(imgs_np, device=device).permute(0, 3, 1, 2) imgs = imgs.to(device) batch_size = len(imgs) h, w = imgs.shape[2:4] m = 12.0 / minsize minl = min(h, w) minl = minl * m # Create scale pyramid scale_i = m scales = [] while minl >= 12: scales.append(scale_i) scale_i = scale_i * factor minl = minl * factor # First stage boxes = [] image_inds = [] all_inds = [] all_i = 0 for scale in scales: im_data = imresample(imgs, (int(h * scale + 1), int(w * scale + 1))) im_data = (im_data - 127.5) * 0.0078125 reg, probs = pnet(im_data) boxes_scale, image_inds_scale = generateBoundingBox( reg, probs[:, 1], scale, threshold[0]) boxes.append(boxes_scale) image_inds.append(image_inds_scale) all_inds.append(all_i + image_inds_scale) all_i += batch_size boxes = torch.cat(boxes, dim=0) image_inds = torch.cat(image_inds, dim=0) all_inds = torch.cat(all_inds, dim=0) # NMS within each scale + image pick = batched_nms(boxes[:, :4], boxes[:, 4], all_inds, 0.5) boxes, image_inds = boxes[pick], image_inds[pick] # NMS within each image pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) boxes, image_inds = boxes[pick], image_inds[pick] regw = boxes[:, 2] - boxes[:, 0] regh = boxes[:, 3] - boxes[:, 1] qq1 = boxes[:, 0] + boxes[:, 5] * regw qq2 = boxes[:, 1] + boxes[:, 6] * regh qq3 = boxes[:, 2] + boxes[:, 7] * regw qq4 = boxes[:, 3] + boxes[:, 8] * regh boxes = torch.stack([qq1, qq2, qq3, qq4, boxes[:, 4]]).permute(1, 0) boxes = rerec(boxes) y, ey, x, ex = pad(boxes, w, h) # Second stage if len(boxes) > 0: im_data = [] for k in range(len(y)): if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1): img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0) im_data.append(imresample(img_k, (24, 24))) im_data = torch.cat(im_data, dim=0) im_data = (im_data - 127.5) * 0.0078125 out = rnet(im_data) out0 = out[0].permute(1, 0) out1 = out[1].permute(1, 0) score = out1[1, :] ipass = score > threshold[1] boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1) image_inds = image_inds[ipass] mv = out0[:, ipass].permute(1, 0) # NMS within each image pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) boxes, image_inds, mv = boxes[pick], image_inds[pick], mv[pick] boxes = bbreg(boxes, mv) boxes = rerec(boxes) # Third stage points = torch.zeros(0, 5, 2, device=device) if len(boxes) > 0: y, ey, x, ex = pad(boxes, w, h) im_data = [] for k in range(len(y)): if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1): img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k], (x[k] - 1):ex[k]].unsqueeze(0) im_data.append(imresample(img_k, (48, 48))) im_data = torch.cat(im_data, dim=0) im_data = (im_data - 127.5) * 0.0078125 out = onet(im_data) out0 = out[0].permute(1, 0) out1 = out[1].permute(1, 0) out2 = out[2].permute(1, 0) score = out2[1, :] points = out1 ipass = score > threshold[2] points = points[:, ipass] boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1) image_inds = image_inds[ipass] mv = out0[:, ipass].permute(1, 0) w_i = boxes[:, 2] - boxes[:, 0] + 1 h_i = boxes[:, 3] - boxes[:, 1] + 1 points_x = w_i.repeat(5, 1) * points[:5, :] + boxes[:, 0].repeat(5, 1) - 1 points_y = h_i.repeat(5, 1) * points[5:10, :] + boxes[:, 1].repeat( 5, 1) - 1 points = torch.stack((points_x, points_y)).permute(2, 1, 0) boxes = bbreg(boxes, mv) # NMS within each image using "Min" strategy pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7) # pick = batched_nms_numpy(boxes[:, :4], boxes[:, 4], image_inds, 0.7, 'Min') boxes, image_inds, points = boxes[pick], image_inds[pick], points[pick] # boxes = boxes.cpu().numpy() # points = points.cpu().numpy() batch_boxes = [] batch_points = [] for b_i in range(batch_size): b_i_inds = torch.where(image_inds == b_i) batch_boxes.append(boxes[b_i_inds]) batch_points.append(points[b_i_inds]) # batch_boxes, batch_points = np.array(batch_boxes), np.array(batch_points) # batch_boxes, batch_points = torch.tensor(batch_boxes), torch.tensor(batch_points) # print(f'batch_boxes: {batch_boxes.shape}') # print(f'batch_points: {batch_points.shape}') return batch_boxes, batch_points
def _set_power_law(self, value): if not torch.is_tensor(value): value = torch.as_tensor(value).to(self.raw_power_law) self.initialize(raw_power_law=self.raw_power_law_constraint. inverse_transform(value))
def as_tensors(self, *args, **kwargs): "Helper that makes everything a tensor with self.X's type." kwargs.setdefault("device", self.X.device) kwargs.setdefault("dtype", self.X.dtype) return tuple(None if r is None else torch.as_tensor(r, **kwargs) for r in args)
def train(self) -> None: """ Update policy using the currently gathered rollout buffer. """ self.policy.train() # Update optimizer learning rate for i in range(self.vehicle_num): for param_group in self.policy.ACP[i].optimizer.param_groups: param_group["lr"] = self.learning_rate # train for n_epochs epochs for epoch in range(self.n_epochs): approx_kl_divs = [] # Do a complete pass on the rollout buffer for rollout_data in self.rollout_buffer.get(self.batch_size): # Re-sample the noise matrix because the log_std has changed # if that line is commented (as in SAC) values, log_prob, entropy = self.policy.forward( loc_features=rollout_data.loc, weight_features=rollout_data.weight[:, np.newaxis], actions=rollout_data.actions, ) # Normalize advantage advantages = rollout_data.advantages advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) # flatten data values = torch.flatten(values) log_prob = torch.flatten(log_prob) entropy = torch.flatten(entropy) old_log_prob = torch.flatten(torch.as_tensor( rollout_data.old_log_prob, dtype=torch.float32, device=self.device)) advantages = torch.flatten(torch.as_tensor( advantages, dtype=torch.float32, device=self.device)) old_values = rollout_data.old_values returns = torch.flatten(torch.as_tensor( rollout_data.returns, dtype=torch.float32, device=self.device)) # ratio between old and new policy, should be one at the first iteration ratio = torch.exp(log_prob - old_log_prob) # clipped surrogate loss policy_loss_1 = advantages * ratio policy_loss_2 = advantages * torch.clamp(ratio, 1 - self.clip_range, 1 + self.clip_range) policy_loss = - torch.min(policy_loss_1, policy_loss_2).mean() if self.clip_range_vf is None: # No clipping values_pred = values else: # Clip the different between old and new value # NOTE: this depends on the reward scaling old_values = torch.flatten(torch.as_tensor(old_values, dtype=torch.float32, device=self.device)) values_pred = old_values + torch.clamp( values - old_values, - self.clip_range_vf, self.clip_range_vf ) # Value loss using the TD(gae_lambda) target value_loss = F.mse_loss(returns, values_pred) # Entropy loss favor exploration if entropy is None: # Approximate entropy when no analytical form entropy_loss = - torch.mean(- log_prob) else: entropy_loss = - torch.mean(entropy) loss = policy_loss + self.ent_coef * entropy_loss + self.vf_coef * value_loss # Optimization step self.policy.optimize( loss=loss, max_grad_norm=self.max_grad_norm, ) approx_kl_divs.append(torch.mean(old_log_prob - log_prob).detach().cpu().numpy()) if self.target_kl is not None and np.mean(approx_kl_divs) > 1.5 * self.target_kl: print(f"Early stopping at step {epoch} due to reaching max kl: {np.mean(approx_kl_divs):.2f}") break
def __call__(self, image, target): w, h = image.size image_id = target["image_id"] image_id = torch.tensor([image_id]) anno = target["annotations"] anno = [ obj for obj in anno if 'iscrowd' not in obj or obj['iscrowd'] == 0 ] boxes = [obj["bbox"] for obj in anno] # guard against no boxes via resizing boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) boxes[:, 2:] += boxes[:, :2] boxes[:, 0::2].clamp_(min=0, max=w) boxes[:, 1::2].clamp_(min=0, max=h) classes = [obj["category_id"] for obj in anno] classes = torch.tensor(classes, dtype=torch.int64) if self.return_masks: segmentations = [obj["segmentation"] for obj in anno] masks = convert_coco_poly_to_mask(segmentations, h, w) keypoints = None if anno and "keypoints" in anno[0]: keypoints = [obj["keypoints"] for obj in anno] keypoints = torch.as_tensor(keypoints, dtype=torch.float32) num_keypoints = keypoints.shape[0] if num_keypoints: keypoints = keypoints.view(num_keypoints, -1, 3) keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) boxes = boxes[keep] classes = classes[keep] if self.return_masks: masks = masks[keep] if keypoints is not None: keypoints = keypoints[keep] target = {} target["boxes"] = boxes target["labels"] = classes if self.return_masks: target["masks"] = masks target["image_id"] = image_id if keypoints is not None: target["keypoints"] = keypoints # for conversion to coco api area = torch.tensor([obj["area"] for obj in anno]) iscrowd = torch.tensor( [obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno]) target["area"] = area[keep] target["iscrowd"] = iscrowd[keep] target["orig_size"] = torch.as_tensor([int(h), int(w)]) target["size"] = torch.as_tensor([int(h), int(w)]) return image, target
def _setup_indices(self, z_indices, cart_indices): n_atoms = self.dims // 3 ind_for_atom = torch.zeros(n_atoms, 3, dtype=torch.long) for i in range(n_atoms): ind_for_atom[i, 0] = 3 * i ind_for_atom[i, 1] = 3 * i + 1 ind_for_atom[i, 2] = 3 * i + 2 self.register_buffer("inds_for_atom", ind_for_atom) sorted_z_indices = topological_sort(z_indices) sorted_z_indices = [[item[0], item[1][0], item[1][1], item[1][2]] for item in sorted_z_indices] rev_z_indices = list(reversed(sorted_z_indices)) mod = [item[0] for item in sorted_z_indices] modified_indices = [] for index in mod: modified_indices.extend(self.inds_for_atom[index]) bond_indices = list(modified_indices[0::3]) angle_indices = list(modified_indices[1::3]) dih_indices = list(modified_indices[2::3]) self.register_buffer("modified_indices", torch.LongTensor(modified_indices)) self.register_buffer("bond_indices", torch.LongTensor(bond_indices)) self.register_buffer("angle_indices", torch.LongTensor(angle_indices)) self.register_buffer("dih_indices", torch.LongTensor(dih_indices)) self.register_buffer("sorted_z_indices", torch.LongTensor(sorted_z_indices)) self.register_buffer("rev_z_indices", torch.LongTensor(rev_z_indices)) # # Setup indexing for reverse pass. # # First, create an array that maps from an atom index into mean_bonds, std_bonds, etc. atom_to_stats = torch.zeros(n_atoms, dtype=torch.long) for i, j in enumerate(mod): atom_to_stats[j] = i self.register_buffer("atom_to_stats", atom_to_stats) # Next create permutation vector that is used in the reverse pass. This maps # from the original atom indexing to the order that the cartesian coordinates # will be built in. This will be filled in as we go. rev_perm = torch.zeros(n_atoms, dtype=torch.long) self.register_buffer("rev_perm", rev_perm) # Next create the inverse of rev_perm. This will be filled in as we go. rev_perm_inv = torch.zeros(n_atoms, dtype=torch.long) self.register_buffer("rev_perm_inv", rev_perm_inv) # Create the list of columns that form our initial cartesian coordintes. init_cart_indices = self.inds_for_atom[cart_indices].view(-1) self.register_buffer("init_cart_indices", init_cart_indices) # Update our permutation vectors for the initial cartesian atoms. for i, j in enumerate(cart_indices): self.rev_perm[i] = torch.as_tensor(j, dtype=torch.long) self.rev_perm_inv[j] = torch.as_tensor(i, dtype=torch.long) # Break Z into blocks, where all of the atoms within a block can be built # in parallel, because they only depend on already-cartesian atoms. all_cart = set(cart_indices) current_cart_ind = i + 1 blocks = [] while sorted_z_indices: next_z_indices = [] next_cart = set() block = [] for atom1, atom2, atom3, atom4 in sorted_z_indices: if (atom2 in all_cart) and (atom3 in all_cart) and (atom4 in all_cart): # We can build this atom from existing cartesian atoms, so we add # it to the list of cartesian atoms available for the next block. next_cart.add(atom1) # Add this atom to our permutation marices. self.rev_perm[current_cart_ind] = atom1 self.rev_perm_inv[atom1] = current_cart_ind current_cart_ind += 1 # Next, we convert the indices for atoms2-4 from their normal values # to the appropriate indices to index into the cartesian array. atom2_mod = self.rev_perm_inv[atom2] atom3_mod = self.rev_perm_inv[atom3] atom4_mod = self.rev_perm_inv[atom4] # Finally, we append this information to the current block. block.append([atom1, atom2_mod, atom3_mod, atom4_mod]) else: # We can't build this atom from existing cartesian atoms, # so put it on the list for next time. next_z_indices.append([atom1, atom2, atom3, atom4]) sorted_z_indices = next_z_indices all_cart = all_cart.union(next_cart) block = torch.as_tensor(block, dtype=torch.long) blocks.append(block) self.rev_blocks = blocks
def __init__(self, obs_dim, act_dim, hidden_sizes, activation): super().__init__() log_std = -0.5 * np.ones(act_dim, dtype=np.float32) self.log_std = torch.nn.Parameter(torch.as_tensor(log_std)) self.mu_net = mlp([obs_dim] + list(hidden_sizes) + [act_dim], activation)
method (str): method to determine the spike threshold (relevant for surrogate gradients) alpha (float): hyper parameter to use in surrogate gradient computation """ tau_syn_inv: torch.Tensor = torch.as_tensor(1.0 / 5e-3) tau_mem_inv: torch.Tensor = torch.as_tensor(1.0 / 1e-2) v_leak: torch.Tensor = torch.as_tensor(0.0) v_th: torch.Tensor = torch.as_tensor(1.0) v_reset: torch.Tensor = torch.as_tensor(0.0) method: str = "super" alpha: float = torch.as_tensor(100.0) default_bio_parameters = LIFParameters( tau_syn_inv=torch.as_tensor(1 / 0.5), tau_mem_inv=torch.as_tensor(1 / 20.0), v_leak=torch.as_tensor(-65.0), v_th=torch.as_tensor(-50.0), v_reset=torch.as_tensor(-65.0), ) class LIFState(NamedTuple): """State of a LIF neuron Parameters: z (torch.Tensor): recurrent spikes v (torch.Tensor): membrane potential i (torch.Tensor): synaptic input current """
def get_mvar_set_cpu(self, Y: Tensor) -> Tensor: r"""Find MVaR set based on the definition in [Prekopa2012MVaR]_. NOTE: This is much faster on CPU for large `n_w` than the alternative but it is significantly slower on GPU. Based on empirical evidence, this is recommended when running on CPU with `n_w > 64`. This first calculates the CDF for each point on the extended domain of the random variable (the grid defined by the given samples), then takes the values with CDF equal to (rounded if necessary) `alpha`. The non-dominated subset of these form the MVaR set. Args: Y: A `batch x n_w x m`-dim tensor of outcomes. This is currently restricted to `m = 2` objectives. TODO: Support `m > 2` objectives. Returns: A `batch` length list of `k x m`-dim tensor of MVaR values, where `k` depends on the corresponding batch inputs. Note that MVaR values in general are not in-sample points. """ if Y.dim() == 3: return [self.get_mvar_set_cpu(y_) for y_ in Y] m = Y.shape[-1] if m != 2: # pragma: no cover raise ValueError( "`get_mvar_set_cpu` only supports `m=2` outcomes!") # Generate sets of all unique values in each output dimension. # Note that points in MVaR are bounded from above by the # independent VaR of each objective. Hence, we only need to # consider the unique outcomes that are less than or equal to # the VaR of the independent objectives var_alpha_idx = ceil(self.alpha * self.n_w) - 1 Y_sorted = Y.topk(Y.shape[0] - var_alpha_idx, dim=0, largest=False).values unique_outcomes_list = [ Y_sorted[:, i].unique().tolist()[::-1] for i in range(m) ] # Convert this into a list of m dictionaries mapping values to indices. unique_outcomes = [ dict(zip(outcomes, range(len(outcomes)))) for outcomes in unique_outcomes_list ] # Initialize a tensor counting the number of points in Y that a given grid point # is dominated by. This will essentially be a non-normalized CDF. counter_tensor = torch.zeros( [len(outcomes) for outcomes in unique_outcomes], dtype=torch.long, device=Y.device, ) # populate the tensor, counting the dominated points. # we only need to consider points in Y where at least one # objective is less than the max objective value in # unique_outcomes_list max_vals = torch.tensor([o[0] for o in unique_outcomes_list], dtype=Y.dtype, device=Y.device) mask = (Y < max_vals).any(dim=-1) counter_tensor += self.n_w - mask.sum() Y_pruned = Y[mask] for y_ in Y_pruned: starting_idcs = [ unique_outcomes[i].get(y_[i].item(), 0) for i in range(m) ] counter_tensor[starting_idcs[0]:, starting_idcs[1]:] += 1 # Get the count alpha-level points should have. alpha_count = ceil(self.alpha * self.n_w) # Get the alpha level indices. alpha_level_indices = (counter_tensor == alpha_count).nonzero( as_tuple=False) # If there are no exact alpha level points, get the smallest alpha' > alpha # and find the corresponding alpha level indices. if alpha_level_indices.numel() == 0: min_greater_than_alpha = counter_tensor[ counter_tensor > alpha_count].min() alpha_level_indices = ( counter_tensor == min_greater_than_alpha).nonzero( as_tuple=False) unique_outcomes = [ torch.as_tensor(list(outcomes.keys()), device=Y.device, dtype=Y.dtype) for outcomes in unique_outcomes ] alpha_level_points = torch.stack( [ unique_outcomes[i][alpha_level_indices[:, i]] for i in range(len(unique_outcomes)) ], dim=-1, ) # MVaR is simply the non-dominated subset of alpha level points. if self.filter_dominated: mask = is_non_dominated(alpha_level_points) mvar = alpha_level_points[mask] else: mvar = alpha_level_points return mvar
def ctc_align(args, device): """ESPnet-specific interface for CTC segmentation. Parses configuration, infers the CTC posterior probabilities, and then aligns start and end of utterances using CTC segmentation. Results are written to the output file given in the args. :param args: given configuration :param device: for inference; one of ['cuda', 'cpu'] :return: 0 on success """ model, train_args = load_trained_model(args.model) assert isinstance(model, ASRInterface) load_inputs_and_targets = LoadInputsAndTargets( mode="asr", load_output=True, sort_in_input_length=False, preprocess_conf=train_args.preprocess_conf if args.preprocess_conf is None else args.preprocess_conf, preprocess_args={"train": False}, ) logging.info(f"Decoding device={device}") # Warn for nets with high memory consumption on long audio files if hasattr(model, "enc"): encoder_module = model.enc.__class__.__module__ elif hasattr(model, "encoder"): encoder_module = model.encoder.__class__.__module__ else: encoder_module = "Unknown" logging.info(f"Encoder module: {encoder_module}") logging.info(f"CTC module: {model.ctc.__class__.__module__}") if "rnn" not in encoder_module: logging.warning( "No BLSTM model detected; memory consumption may be high.") model.to(device=device).eval() # read audio and text json data with open(args.data_json, "rb") as f: js = json.load(f)["utts"] with open(args.utt_text, "r", encoding="utf-8") as f: lines = f.readlines() i = 0 text = {} segment_names = {} for name in js.keys(): text_per_audio = [] segment_names_per_audio = [] while i < len(lines) and lines[i].startswith(name): text_per_audio.append(lines[i][lines[i].find(" ") + 1:]) segment_names_per_audio.append(lines[i][:lines[i].find(" ")]) i += 1 text[name] = text_per_audio segment_names[name] = segment_names_per_audio # apply configuration config = CtcSegmentationParameters() if args.subsampling_factor is not None: config.subsampling_factor = args.subsampling_factor if args.frame_duration is not None: config.frame_duration_ms = args.frame_duration if args.min_window_size is not None: config.min_window_size = args.min_window_size if args.max_window_size is not None: config.max_window_size = args.max_window_size config.char_list = train_args.char_list if args.use_dict_blank and args.use_dict_blank > 0: config.blank = config.char_list[0] logging.info(f"Blank char was set to >{config.blank}<") else: logging.debug( f"Blank char >{config.blank}< (align) >{config.char_list[0]}< (model)" ) if config.blank != config.char_list[0]: logging.error( "Blank char mismatch; this can result in an IndexError.") logging.error( "Pass the parameter --use-dict-blank 1 to asr_align.py") if args.scoring_length is not None: config.score_min_mean_over_L = args.scoring_length logging.info( f"Frame timings: {config.frame_duration_ms}ms * {config.subsampling_factor}" ) # Iterate over audio files to decode and align for idx, name in enumerate(js.keys(), 1): logging.info("(%d/%d) Aligning " + name, idx, len(js.keys())) batch = [(name, js[name])] feat, label = load_inputs_and_targets(batch) feat = feat[0] with torch.no_grad(): # Encode input frames enc_output = model.encode( torch.as_tensor(feat).to(device)).unsqueeze(0) # Apply ctc layer to obtain log character probabilities lpz = model.ctc.log_softmax(enc_output)[0].cpu().numpy() # Prepare the text for aligning ground_truth_mat, utt_begin_indices = prepare_text(config, text[name]) # Align using CTC segmentation timings, char_probs, state_list = ctc_segmentation( config, lpz, ground_truth_mat) logging.debug(f"state_list = {state_list}") # Obtain list of utterances with time intervals and confidence score segments = determine_utterance_segments(config, utt_begin_indices, char_probs, timings, text[name]) # Write to "segments" file for i, boundary in enumerate(segments): utt_segment = (f"{segment_names[name][i]} {name} {boundary[0]:.2f}" f" {boundary[1]:.2f} {boundary[2]:.9f}\n") args.output.write(utt_segment) return 0
def _prepare_batches(self): dataset_size = len(self.group_ids) # get the sampled indices from the sampler sampled_ids = torch.as_tensor(list(self.sampler)) # potentially not all elements of the dataset were sampled # by the sampler (e.g., DistributedSampler). # construct a tensor which contains -1 if the element was # not sampled, and a non-negative number indicating the # order where the element was sampled. # for example. if sampled_ids = [3, 1] and dataset_size = 5, # the order is [-1, 1, -1, 0, -1] order = torch.full((dataset_size,), -1, dtype=torch.int64) order[sampled_ids] = torch.arange(len(sampled_ids)) # get a mask with the elements that were sampled mask = order >= 0 # find the elements that belong to each individual cluster clusters = [(self.group_ids == i) & mask for i in self.groups] # get relative order of the elements inside each cluster # that follows the order from the sampler relative_order = [order[cluster] for cluster in clusters] # with the relative order, find the absolute order in the # sampled space permutation_ids = [s[s.sort()[1]] for s in relative_order] # permute each cluster so that they follow the order from # the sampler permuted_clusters = [sampled_ids[idx] for idx in permutation_ids] # splits each cluster in batch_size, and merge as a list of tensors splits = [c.split(self.batch_size) for c in permuted_clusters] merged = tuple(itertools.chain.from_iterable(splits)) # now each batch internally has the right order, but # they are grouped by clusters. Find the permutation between # different batches that brings them as close as possible to # the order that we have in the sampler. For that, we will consider the # ordering as coming from the first element of each batch, and sort # correspondingly first_element_of_batch = [t[0].item() for t in merged] # get and inverse mapping from sampled indices and the position where # they occur (as returned by the sampler) inv_sampled_ids_map = {v: k for k, v in enumerate(sampled_ids.tolist())} # from the first element in each batch, get a relative ordering first_index_of_batch = torch.as_tensor( [inv_sampled_ids_map[s] for s in first_element_of_batch] ) # permute the batches so that they approximately follow the order # from the sampler permutation_order = first_index_of_batch.sort(0)[1].tolist() # finally, permute the batches batches = [merged[i].tolist() for i in permutation_order] if self.drop_uneven: kept = [] for batch in batches: if len(batch) == self.batch_size: kept.append(batch) batches = kept return batches
batch_actions.append(action.item()) episode_rewards.append(reward) if done: episode_return = sum(episode_rewards) episode_length = len(episode_rewards) batch_episode_returns.append(episode_return) batch_episode_lengths.append(episode_length) batch_weights += [episode_return] * episode_length observation = env.reset() done = False episode_rewards = [] if len(batch_observations) > batch_size: break n.train() opt.zero_grad() logits = n(torch.as_tensor(batch_observations).type(torch.FloatTensor)) loss = -torch.mean(torch.FloatTensor(batch_weights) * dist.Categorical(logits=logits).log_prob(torch.IntTensor(batch_actions))) print(f" loss: {loss:.2f} return: {np.mean(batch_episode_returns):.2f} episode length: {np.mean(batch_episode_lengths):.2f}") loss.backward() opt.step() torch.save(n.state_dict, './gym-cartpole-torch-model.pt')
def __getitem__(self, index): """ Given the video index, return the list of frames, label, and video index if the video frames can be fetched. Args: index (int): the video index provided by the pytorch sampler. Returns: frames (tensor): the frames of sampled from the video. The dimension is `channel` x `num frames` x `height` x `width`. label (int): the label of the current video. index (int): the index of the video. """ if self.mode in ["train", "val"]: # -1 indicates random sampling. spatial_sample_index = -1 min_scale = self.cfg.DATA.TRAIN_JITTER_SCALES[0] max_scale = self.cfg.DATA.TRAIN_JITTER_SCALES[1] crop_size = self.cfg.DATA.TRAIN_CROP_SIZE elif self.mode in ["test"]: # spatial_sample_index is in [0, 1, 2]. Corresponding to left, # center, or right if width is larger than height, and top, middle, # or bottom if height is larger than width. spatial_sample_index = (self._spatial_temporal_idx[index] % self.cfg.TEST.NUM_SPATIAL_CROPS) min_scale, max_scale, crop_size = [self.cfg.DATA.TEST_CROP_SIZE ] * 3 # The testing is deterministic and no jitter should be performed. # min_scale, max_scale, and crop_size are expect to be the same. assert len({min_scale, max_scale, crop_size}) == 1 else: raise NotImplementedError("Does not support {} mode".format( self.mode)) label = self._labels[index] # seq = self.get_seq_frames(index) seq = self.get_seq_frames_new(index) frames = torch.as_tensor( utils.retry_load_images( [self._path_to_videos[index][frame] for frame in seq], self._num_retries, )) # Perform color normalization. frames = utils.tensor_normalize(frames, self.cfg.DATA.MEAN, self.cfg.DATA.STD) # T H W C -> C T H W. frames = frames.permute(3, 0, 1, 2) # Perform data augmentation. frames = utils.spatial_sampling_new( frames, self.cfg, spatial_idx=spatial_sample_index, min_scale=min_scale, max_scale=max_scale, crop_size=crop_size, random_horizontal_flip=self.cfg.DATA.RANDOM_FLIP, inverse_uniform_sampling=self.cfg.DATA.INV_UNIFORM_SAMPLE, ) frames = utils.pack_pathway_output(self.cfg, frames) return frames, label, index, {}
import torch import numpy as np import matplotlib.pyplot as plt import torch.autograd.profiler as profiler import util if __name__ == '__main__': n = 5 s = 121 y = torch.as_tensor(util.gaussian_shaped_labels(4, (s, s))) response = torch.zeros((n, 1, s, s)) response[0, 0, 60, 60] = 100 response[1, 0, 0, 60] = 100 response[2, 0, 60, 0] = 100 response[3, 0, 30, 100] = 100 response[4, 0, 80, 90] = 100 with profiler.profile(use_cuda=True, record_shapes=True, profile_memory=True, with_stack=True) as p: # with profiler.record_function('model_inference'): fake_y = util.create_fake_y(y, response) print( p.key_averages(group_by_stack_n=5).table( sort_by="self_cuda_time_total", row_limit=-1)) fig, ax = plt.subplots(2, n)
def merge_bg(self, img, label, bg_dir): sometimes = lambda aug: iaa.Sometimes(0.5, aug) # merge bg random_bg = np.random.choice([0, 1, 2]) list_sample_bg = glob.glob(bg_dir+'/*.jpg') + glob.glob(bg_dir+'/*/*.jpg') + glob.glob(bg_dir+'/*/*/*.jpg') + glob.glob(bg_dir+'/*/*/*/*.jpg') # 0: original # 1: list_sample_bg # 2: pure bg if random_bg == 1: bg_path = np.random.choice(list_sample_bg) bg = cv2.imread(bg_path, cv2.IMREAD_COLOR) bg = cv2.resize(bg, (self.input_size, self.input_size), interpolation=cv2.INTER_LINEAR) # bg = bg.astype(np.float32) # [:, :, ::-1] # RGB to BGR!!! elif random_bg == 2: # Generate Bg bg = np.random.randint(255, size=3) bg = bg.reshape(1,1,3) bg = np.repeat(bg, self.input_size, axis=0) bg = np.repeat(bg, self.input_size, axis=1) bg = bg.astype(np.uint8) # Augmentation bg_seq = iaa.Sequential( [ # execute 0 to 5 of the following (less important) augmenters per image # don't execute all of them, as that would often be way too strong iaa.SomeOf((1, 6), [ sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation iaa.OneOf([ iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0 iaa.AverageBlur(k=(2, 7)), # blur image using local means with kernel sizes between 2 and 7 iaa.MedianBlur(k=(3, 11)), # blur image using local medians with kernel sizes between 2 and 7 ]), iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images # search either for all edges or for directed edges, # blend the result with the original image using a blobby mask iaa.SimplexNoiseAlpha(iaa.OneOf([ iaa.EdgeDetect(alpha=(0.5, 1.0)), iaa.DirectedEdgeDetect(alpha=(0.5, 1.0), direction=(0.0, 1.0)), ])), iaa.OneOf([ iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # add gaussian noise to images iaa.AdditiveLaplaceNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), iaa.AdditivePoissonNoise(lam=(0.0, 4.0), per_channel=0.5) ]), iaa.OneOf([ iaa.Dropout((0.01, 0.1), per_channel=0.5), # randomly remove up to 10% of the pixels iaa.CoarseDropout((0.03, 0.2), size_percent=(0.02, 0.05), per_channel=0.2), ]), iaa.Invert(0.1, per_channel=True), # invert color channels iaa.AddToHueAndSaturation((-20, 20)), # change hue and saturation # either change the brightness of the whole image (sometimes # per channel) or change the brightness of subareas iaa.OneOf([ iaa.Multiply((0.5, 1.5), per_channel=0.5), iaa.FrequencyNoiseAlpha( exponent=(-4, 0), first=iaa.Multiply((0.5, 1.5), per_channel=True), second=iaa.ContrastNormalization((0.5, 2.0)) ) ]), iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), # improve or worsen the contrast sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths) iaa.Add((-25, 25), per_channel=0.5), # change brightness of images (by -10 to 10 of original value) iaa.OneOf([ iaa.ImpulseNoise((0.01, 0.1)), iaa.SaltAndPepper((0.01, 0.1), per_channel=0.2), ]), iaa.JpegCompression(), ], random_order=True ), iaa.OneOf([ iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0 iaa.AverageBlur(k=(2, 7)), # blur image using local means with kernel sizes between 2 and 7 iaa.MedianBlur(k=(3, 11)), # blur image using local medians with kernel sizes between 2 and 7 iaa.JpegCompression(), iaa.Multiply((0.5, 1.5), per_channel=0.5), iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), ]), ], random_order=False ) bg = bg_seq.augment_image(bg) if random_bg >= 1: bg = torch.as_tensor(bg.astype(np.float32)) bg = torch.transpose(torch.transpose(bg, 1, 2), 0, 1) img = img * label + bg * (1 - label) return img # Bacon
def torch_trilinear_interpolation( volume: torch.Tensor, coords: torch.Tensor, ) -> torch.Tensor: """Evaluates the data volume at given coordinates using trilinear interpolation on a torch tensor. Interpolation is done using the device on which the volume is stored. Parameters ---------- volume : torch.Tensor with 3D or 4D shape The input volume to interpolate from coords : torch.Tensor with shape (N,3) The coordinates where to interpolate Returns ------- output : torch.Tensor with shape (N, #modalities) The list of interpolated values References ---------- [1] https://spie.org/samples/PM159.pdf """ # Get device, and make sure volume and coords are using the same one assert volume.device == coords.device, "volume on device: {}; " \ "coords on device: {}".format( volume.device, coords.device) coords = coords.type(torch.float32) volume = volume.type(torch.float32) device = volume.device local_idx = idx[:] # Send data to device idx_torch = torch.as_tensor(local_idx, dtype=torch.float, device=device) B1_torch = torch.as_tensor(B1, dtype=torch.float, device=device) if volume.dim() <= 2 or volume.dim() >= 5: raise ValueError("Volume must be 3D or 4D!") if volume.dim() == 3: # torch needs indices to be cast to long indices_unclipped = (coords[:, None, :] + idx_torch).reshape( (-1, 3)).long() # Clip indices to make sure we don't go out-of-bounds lower = torch.as_tensor([0, 0, 0]).to(device) upper = (torch.as_tensor(volume.shape) - 1).to(device) indices = torch.min(torch.max(indices_unclipped, lower), upper) # Fetch volume data at indices P = volume[indices[:, 0], indices[:, 1], indices[:, 2]].reshape( (coords.shape[0], -1)).t() d = coords - torch.floor(coords) dx, dy, dz = d[:, 0], d[:, 1], d[:, 2] Q1 = torch.stack([ torch.ones_like(dx), dx, dy, dz, dx * dy, dy * dz, dx * dz, dx * dy * dz ], dim=0) output = torch.sum(P * torch.mm(B1_torch.t(), Q1), dim=0) return output if volume.dim() == 4: # 8 coordinates of the corners of the cube, for each input coordinate indices_unclipped = torch.floor(coords[:, None, :] + idx_torch).reshape((-1, 3)).long() # Clip indices to make sure we don't go out-of-bounds lower = torch.as_tensor([0, 0, 0], device=device) upper = torch.as_tensor(volume.shape[:3], device=device) - 1 indices = torch.min(torch.max(indices_unclipped, lower), upper) # Fetch volume data at indices P = volume[indices[:, 0], indices[:, 1], indices[:, 2], :].reshape( (coords.shape[0], 8, volume.shape[-1])) d = coords - torch.floor(coords) dx, dy, dz = d[:, 0], d[:, 1], d[:, 2] Q1 = torch.stack([ torch.ones_like(dx), dx, dy, dz, dx * dy, dy * dz, dx * dz, dx * dy * dz ], dim=0) output = torch.sum(P * torch.mm(B1_torch.t(), Q1).t()[:, :, None], dim=1) return output.type(torch.float32) raise ValueError( "There was a problem with the volume's number of dimensions!")
def __call__(self, dataset_dict): """ Args: dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. Returns: dict: a format that builtin models in detectron2 accept """ dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below # USER: Write your own image loading if it's not from a file image = utils.read_image(dataset_dict["file_name"], format=self.img_format) utils.check_image_size(dataset_dict, image) if "annotations" not in dataset_dict: image, transforms = T.apply_transform_gens( ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image ) else: # Crop around an instance if there are instances in the image. # USER: Remove if you don't use cropping if self.crop_gen: crop_tfm = utils.gen_crop_transform_with_instance( self.crop_gen.get_crop_size(image.shape[:2]), image.shape[:2], np.random.choice(dataset_dict["annotations"]), ) image = crop_tfm.apply_image(image) image, transforms = T.apply_transform_gens(self.tfm_gens, image) if self.crop_gen: transforms = crop_tfm + transforms image_shape = image.shape[:2] # h, w # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, # but not efficient on large generic data structures due to the use of pickle & mp.Queue. # Therefore it's important to use torch.Tensor. dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) # USER: Remove if you don't use pre-computed proposals. if self.load_proposals: utils.transform_proposals( dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk ) # Tin # fast_autoaug 1 if not self.is_train and not self.autoaug: # Bacon # USER: Modify this if you want to keep them for some reason. dataset_dict.pop("annotations", None) dataset_dict.pop("sem_seg_file_name", None) return dataset_dict if "annotations" in dataset_dict: # USER: Modify this if you want to keep them for some reason. for anno in dataset_dict["annotations"]: if not self.mask_on: anno.pop("segmentation", None) if not self.keypoint_on: anno.pop("keypoints", None) # USER: Implement additional transformations if you have other types of data annos = [ utils.transform_instance_annotations( obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices ) for obj in dataset_dict.pop("annotations") if obj.get("iscrowd", 0) == 0 ] instances = utils.annotations_to_instances( annos, image_shape, mask_format=self.mask_format ) # Create a tight bounding box from masks, useful when image is cropped if self.crop_gen and instances.has("gt_masks"): instances.gt_boxes = instances.gt_masks.get_bounding_boxes() dataset_dict["instances"] = utils.filter_empty_instances(instances) # USER: Remove if you don't do semantic/panoptic segmentation. if "sem_seg_file_name" in dataset_dict: a = dataset_dict["sem_seg_file_name"] with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: sem_seg_gt = Image.open(f) sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") # Tin # matting 0 sem_seg_gt = sem_seg_gt.copy() if sem_seg_gt.max() == 0: assert 0, "label max 0" if self.matting: # the matting datasets' labels are not unification if sem_seg_gt.max() == 1: sem_seg_gt = sem_seg_gt * 255 # sem_fpn 0 if self.binary: sem_seg_gt[sem_seg_gt==255] = 1 sem_seg_gt = sem_seg_gt[:, :, 0].copy() # Bacon sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) # Tin # matting 1 if self.matting or self.binary: sem_seg_gt[sem_seg_gt==255] = -1 sem_seg_gt[sem_seg_gt>0] = 1 sem_seg_gt[sem_seg_gt==-1] = 255 # Bacon dataset_dict["sem_seg"] = sem_seg_gt if not self.is_train: import cv2 cv2.imwrite("/home/pgding/project/LIP/detectron2_bdd/tools/output/tmp/"+a.split("/")[-1].replace(".png", "_orig.jpg"), dataset_dict["image"].numpy().transpose(1, 2, 0)) # if self.is_train: # s = sem_seg_gt.numpy().copy() # s[s==1] = 100 # cv2.imwrite("/home/pgding/project/LIP/detectron2_bdd/tools/output/tmp/"+a.split("/")[-1].replace(".png", "_train_mp.jpg"), s) # Tin # matting 2 if self.is_train and self.matting: img = self.merge_bg(dataset_dict["image"], dataset_dict["sem_seg"], dataset_dict["bg_file_dir"]) dataset_dict["image"] = img # Bacon return dataset_dict
def evaluate_box_proposals( predictions, dataset, thresholds=None, area="all", limit=None ): """Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0 ** 2, 1e5 ** 2], # all [0 ** 2, 32 ** 2], # small [32 ** 2, 96 ** 2], # medium [96 ** 2, 1e5 ** 2], # large [96 ** 2, 128 ** 2], # 96-128 [128 ** 2, 256 ** 2], # 128-256 [256 ** 2, 512 ** 2], # 256-512 [512 ** 2, 1e5 ** 2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for image_id, prediction in enumerate(predictions): original_id = dataset.id_to_img_map[image_id] # TODO replace with get_img_info? image_width = dataset.coco.imgs[original_id]["width"] image_height = dataset.coco.imgs[original_id]["height"] prediction = prediction.resize((image_width, image_height)) # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = prediction.get_field("objectness").sort(descending=True)[1] prediction = prediction[inds] ann_ids = dataset.coco.getAnnIds(imgIds=original_id) anno = dataset.coco.loadAnns(ann_ids) gt_boxes = [obj["bbox"] for obj in anno if obj["iscrowd"] == 0] gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) # guard against no boxes gt_boxes = BoxList(gt_boxes, (image_width, image_height), mode="xywh").convert( "xyxy" ) gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0]) if len(gt_boxes) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if len(prediction) == 0: continue if limit is not None and len(prediction) > limit: prediction = prediction[:limit] overlaps = boxlist_iou(prediction, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(prediction), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = torch.cat(gt_overlaps, dim=0) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, }