Example #1
0
    def __init__(
        self,
        model: Model,
        best_f: Union[float, Tensor],
        objective_index: int,
        constraints: Dict[int, Tuple[Optional[float], Optional[float]]],
        maximize: bool = True,
    ) -> None:
        r"""Analytic Constrained Expected Improvement.

        Args:
            model: A fitted single-outcome model.
            best_f: Either a scalar or a `b`-dim Tensor (batch mode) representing
                the best function value observed so far (assumed noiseless).
            objective_index: The index of the objective.
            constraints: A dictionary of the form `{i: [lower, upper]}`, where
                `i` is the output index, and `lower` and `upper` are lower and upper
                bounds on that output (resp. interpreted as -Inf / Inf if None)
            maximize: If True, consider the problem a maximization problem.
        """
        super().__init__(model=model)
        self.maximize = maximize
        self.objective_index = objective_index
        self.constraints = constraints
        self.register_buffer("best_f", torch.as_tensor(best_f))
        self._preprocess_constraint_bounds(constraints=constraints)
        self.register_forward_pre_hook(convert_to_target_pre_hook)
Example #2
0
    def __getitem__(self, idx):
        img, anno = super(COCODataset, self).__getitem__(idx)

        # filter crowd annotations
        # TODO might be better to add an extra field
        anno = [obj for obj in anno if obj["iscrowd"] == 0]

        boxes = [obj["bbox"] for obj in anno]
        boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
        target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")

        classes = [obj["category_id"] for obj in anno]
        classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
        classes = torch.tensor(classes)
        target.add_field("labels", classes)

        masks = [obj["segmentation"] for obj in anno]
        masks = SegmentationMask(masks, img.size)
        target.add_field("masks", masks)

        target = target.clip_to_image(remove_empty=True)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target, idx
    def __init__(self, polygons, size, mode):
        # assert isinstance(polygons, list), '{}'.format(polygons)
        if isinstance(polygons, list):
            polygons = [torch.as_tensor(p, dtype=torch.float32) for p in polygons]
        elif isinstance(polygons, Polygons):
            polygons = polygons.polygons

        self.polygons = polygons
        self.size = size
        self.mode = mode
def align_and_update_state_dicts(model_state_dict, loaded_state_dict):
    """
    Strategy: suppose that the models that we will create will have prefixes appended
    to each of its keys, for example due to an extra level of nesting that the original
    pre-trained weights from ImageNet won't contain. For example, model.state_dict()
    might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
    res2.conv1.weight. We thus want to match both parameters together.
    For that, we look for each model weight, look among all loaded keys if there is one
    that is a suffix of the current weight name, and use it if that's the case.
    If multiple matches exist, take the one with longest size
    of the corresponding name. For example, for the same model as before, the pretrained
    weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
    we want to match backbone[0].body.conv1.weight to conv1.weight, and
    backbone[0].body.res2.conv1.weight to res2.conv1.weight.
    """
    current_keys = sorted(list(model_state_dict.keys()))
    loaded_keys = sorted(list(loaded_state_dict.keys()))
    # get a matrix of string matches, where each (i, j) entry correspond to the size of the
    # loaded_key string, if it matches
    match_matrix = [
        len(j) if i.endswith(j) else 0 for i in current_keys for j in loaded_keys
    ]
    match_matrix = torch.as_tensor(match_matrix).view(
        len(current_keys), len(loaded_keys)
    )
    max_match_size, idxs = match_matrix.max(1)
    # remove indices that correspond to no-match
    idxs[max_match_size == 0] = -1

    # used for logging
    max_size = max([len(key) for key in current_keys]) if current_keys else 1
    max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1
    log_str_template = "{: <{}} loaded from {: <{}} of shape {}"
    logger = logging.getLogger(__name__)
    for idx_new, idx_old in enumerate(idxs.tolist()):
        if idx_old == -1:
            continue
        key = current_keys[idx_new]
        key_old = loaded_keys[idx_old]
        model_state_dict[key] = loaded_state_dict[key_old]
        logger.info(
            log_str_template.format(
                key,
                max_size,
                key_old,
                max_size_loaded,
                tuple(loaded_state_dict[key_old].shape),
            )
        )
    def __init__(self, sampler, group_ids, batch_size, drop_uneven=False):
        if not isinstance(sampler, Sampler):
            raise ValueError(
                "sampler should be an instance of "
                "torch.utils.data.Sampler, but got sampler={}".format(sampler)
            )
        self.sampler = sampler
        self.group_ids = torch.as_tensor(group_ids)
        assert self.group_ids.dim() == 1
        self.batch_size = batch_size
        self.drop_uneven = drop_uneven

        self.groups = torch.unique(self.group_ids).sort(0)[0]

        self._can_reuse_batches = False
    def __init__(self, bbox, image_size, mode="xyxy"):
        device = bbox.device if isinstance(bbox, torch.Tensor) else torch.device("cpu")
        bbox = torch.as_tensor(bbox, dtype=torch.float32, device=device)
        if bbox.ndimension() != 2:
            raise ValueError(
                "bbox should have 2 dimensions, got {}".format(bbox.ndimension())
            )
        if bbox.size(-1) != 4:
            raise ValueError(
                "last dimenion of bbox should have a "
                "size of 4, got {}".format(bbox.size(-1))
            )
        if mode not in ("xyxy", "xywh"):
            raise ValueError("mode should be 'xyxy' or 'xywh'")

        self.bbox = bbox
        self.size = image_size  # (image_width, image_height)
        self.mode = mode
        self.extra_fields = {}
def segment(stack, method='ensemble', pad_mode='reflect', seg_threshold=0.8,
            min_voxels=65, max_voxels=4168, compactness_factor=0.05):
    """ Utility function to segment a 3-d stack

    :param stack: 3-d array. Raw Stack resampled to 1 mm^3.
    :param method: A string from 'single' or 'ensemble'. Whether to use a single model or
        an ensemble of models for segmentation.
    :param pad_mode: How will the stack be padded. Any valid mode from np.pad.
    :param seg_threshold: Threshold used to produce instance segmentations.
    :param min_voxels: Minimum number of voxels in a valid object.
    :param max_voxels: Maximum number of voxels in a valid object.
    :param compactness_factor: Weight for the compactness objective during instance
        segmentation.

    :return: detection, segmentation, instance. Arrays of the same shape as stack:
        voxel-wise centroid probability (np.float32), voxel-wise cell probability
        (np.float32) and instance segmentation (np.int32).
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    if device.type == 'cpu':
        print('Running 3-d segmentation in the CPU will take more time.')

    # Prepare input
    padded = np.pad(stack, 20, mode=pad_mode)
    lcned= utils.lcn(padded, (3, 25, 25))
    norm = (lcned - lcned.mean()) / lcned.std()
    input_ = torch.as_tensor(norm[np.newaxis, np.newaxis, ...])  # 1 x 1 x D x H x W
    del padded, lcned, norm # release memory

    # Declare models
    net = models.QCANet()
    data_path = '/data/pipeline/python/pipeline/data/'
    if method == 'single':
        model_names = ['bestndn_1-9-17026.pth']
    else:
        model_names = ['bestndn_1-9-17026.pth', 'bestndn_1-17-17206.pth',
                       'bestndn_1-3-17259.pth', 'bestndn_1-8-17261.pth']  # we'll ensemble all of these

    # Create detection and segmentation probabilities
    detection_sum = np.empty(input_.shape[-3:], dtype=np.float32)
    segmentation_sum = np.empty(input_.shape[-3:], dtype=np.float32)
    with torch.no_grad():
        for model_name in model_names:
            # Import model from file
            net.load_state_dict(torch.load(path.join(data_path, model_name)))
            net.eval()
            net.to(device)

            # Segment
            detection, segmentation = net.forward_on_big_input(input_)
            detection_sum += torch.sigmoid(detection).squeeze().numpy()
            segmentation_sum += torch.sigmoid(segmentation).squeeze().numpy()
        detection = detection_sum / len(model_names)
        segmentation = segmentation_sum / len(model_names)
    del input_, detection_sum, segmentation_sum # release memory

    # Drop padding (added above)
    detection = detection[20:-20, 20:-20, 20:-20]
    segmentation = segmentation[20:-20, 20:-20, 20:-20]

    # Create instance segmentation
    instance = utils.prob2labels(detection, segmentation, seg_threshold=seg_threshold,
                                 min_voxels=min_voxels, max_voxels=max_voxels,
                                 compactness_factor=compactness_factor)

    return detection, segmentation, instance
Example #8
0
 def to_image(*numbers):
     return torch.as_tensor(numbers).reshape(1, 1, 1, len(numbers))
Example #9
0
    def update(self, obs, action, reward, next_obs, terminal):
        if self.training:
            if hasattr(self, 'local_buffer'):
                self.add_to_local_buffer(obs, action, reward, next_obs,
                                         terminal)
            self.add_to_memory(obs, action, reward, next_obs, terminal)

        for i, done in enumerate(terminal):
            if done:
                try:
                    self.local_buffer[i].reset()
                except AttributeError:
                    pass
                if self.training:
                    try:
                        self.exploration_noise[i].reset_states()
                    except AttributeError:
                        pass

        if self.ready_to_update():
            self.online_network.eval()

            # Maybe not the best way to do a line break,
            # but I like it more than \
            (obs, action, reward, next_obs, terminal, weight,
             indices) = self.sample_from_memory(self.batch_size)

            update_target = self.update_target(obs, action, reward, next_obs,
                                               terminal)

            self.online_network.train()

            if self.n_steps > 1:
                obs = obs[:, 0]
                action = action[:, 0]

            if weight is None:
                weight = torch.ones(self.batch_size, device=self._device)
            else:
                weight = torch.as_tensor(weight,
                                         dtype=torch.float32,
                                         device=self._device)
                assert (weight.dim() == 1), 'TODO'

            Q_sa = self.online_network.critic_value(obs, action)
            td_error = Q_sa - update_target
            critic_loss = (td_error).pow(2).mul(0.5).squeeze(-1)
            critic_loss = (critic_loss * weight).mean()

            self.critic_optimizer.zero_grad()
            critic_loss.backward()
            if self.clip_gradients:
                nn.utils.clip_grad_norm_(self.online_network.critic_params,
                                         self.clip_gradients)
            self.critic_optimizer.step()

            action = self.online_network.action(obs)
            policy_loss = -self.online_network.critic_value(obs, action).mean()

            self.policy_optimizer.zero_grad()
            policy_loss.backward()
            if self.clip_gradients:
                nn.utils.clip_grad_norm_(self.online_network.policy_params,
                                         self.clip_gradients)
            self.policy_optimizer.step()

            self.soft_update()
            self.online_network.eval()

            updated_p = (np.abs(td_error.detach().cpu().numpy().squeeze()) +
                         1e-8)
            self.update_priorities(indices, updated_p)

        if self.training:
            self.current_step += 1
Example #10
0
 def __init__(self, M, const_diagonal=False):
     self.mat = M = torch.as_tensor(M)
     self.m, self.n = self.shape = M.shape
     self._cache = {}
Example #11
0
    def forward(
        self,
        src: torch.Tensor,
        theta: torch.Tensor,
        spatial_size: Optional[Union[Sequence[int],
                                     int]] = None) -> torch.Tensor:
        """
        ``theta`` must be an affine transformation matrix with shape
        3x3 or Nx3x3 or Nx2x3 or 2x3 for spatial 2D transforms,
        4x4 or Nx4x4 or Nx3x4 or 3x4 for spatial 3D transforms,
        where `N` is the batch size. `theta` will be converted into float Tensor for the computation.

        Args:
            src (array_like): image in spatial 2D or 3D (N, C, spatial_dims),
                where N is the batch dim, C is the number of channels.
            theta (array_like): Nx3x3, Nx2x3, 3x3, 2x3 for spatial 2D inputs,
                Nx4x4, Nx3x4, 3x4, 4x4 for spatial 3D inputs. When the batch dimension is omitted,
                `theta` will be repeated N times, N is the batch dim of `src`.
            spatial_size: output spatial shape, the full output shape will be
                `[N, C, *spatial_size]` where N and C are inferred from the `src`.

        Raises:
            TypeError: When ``theta`` is not a ``torch.Tensor``.
            ValueError: When ``theta`` is not one of [Nxdxd, dxd].
            ValueError: When ``theta`` is not one of [Nx3x3, Nx4x4].
            TypeError: When ``src`` is not a ``torch.Tensor``.
            ValueError: When ``src`` spatially is not one of [2D, 3D].
            ValueError: When affine and image batch dimension differ.

        """
        # validate `theta`
        if not isinstance(theta, torch.Tensor):
            raise TypeError(
                f"theta must be torch.Tensor but is {type(theta).__name__}.")
        if theta.dim() not in (2, 3):
            raise ValueError(f"theta must be Nxdxd or dxd, got {theta.shape}.")
        if theta.dim() == 2:
            theta = theta[None]  # adds a batch dim.
        theta = theta.clone()  # no in-place change of theta
        theta_shape = tuple(theta.shape[1:])
        if theta_shape in ((2, 3), (3, 4)):  # needs padding to dxd
            pad_affine = torch.tensor([0, 0, 1] if theta_shape[0] ==
                                      2 else [0, 0, 0, 1])
            pad_affine = pad_affine.repeat(theta.shape[0], 1, 1).to(theta)
            pad_affine.requires_grad = False
            theta = torch.cat([theta, pad_affine], dim=1)
        if tuple(theta.shape[1:]) not in ((3, 3), (4, 4)):
            raise ValueError(
                f"theta must be Nx3x3 or Nx4x4, got {theta.shape}.")

        # validate `src`
        if not isinstance(src, torch.Tensor):
            raise TypeError(
                f"src must be torch.Tensor but is {type(src).__name__}.")
        sr = src.dim() - 2  # input spatial rank
        if sr not in (2, 3):
            raise ValueError(
                f"Unsupported src dimension: {sr}, available options are [2, 3]."
            )

        # set output shape
        src_size = tuple(src.shape)
        dst_size = src_size  # default to the src shape
        if self.spatial_size is not None:
            dst_size = src_size[:2] + self.spatial_size
        if spatial_size is not None:
            dst_size = src_size[:2] + ensure_tuple(spatial_size)

        # reverse and normalize theta if needed
        if not self.normalized:
            theta = to_norm_affine(
                affine=theta,
                src_size=src_size[2:],
                dst_size=dst_size[2:],
                align_corners=self.align_corners,
                zero_centered=self.zero_centered,
            )
        if self.reverse_indexing:
            rev_idx = torch.as_tensor(range(sr - 1, -1, -1), device=src.device)
            theta[:, :sr] = theta[:, rev_idx]
            theta[:, :, :sr] = theta[:, :, rev_idx]
        if (theta.shape[0] == 1) and src_size[0] > 1:
            # adds a batch dim to `theta` in order to match `src`
            theta = theta.repeat(src_size[0], 1, 1)
        if theta.shape[0] != src_size[0]:
            raise ValueError(
                f"affine and image batch dimension must match, got affine={theta.shape[0]} image={src_size[0]}."
            )

        grid = nn.functional.affine_grid(theta=theta[:, :sr],
                                         size=list(dst_size),
                                         align_corners=self.align_corners)
        dst = nn.functional.grid_sample(
            input=src.contiguous(),
            grid=grid,
            mode=self.mode,
            padding_mode=self.padding_mode,
            align_corners=self.align_corners,
        )
        return dst
    def learn(self, states, actions, rewards, returns, history,
              args) -> History:
        states = torch.as_tensor(states, dtype=torch.float, device=self.device)
        rlsz = self.rollouts * states.size(1)
        states = states.reshape(rlsz, states.shape[2], states.shape[3],
                                states.shape[4])
        actions = torch.as_tensor(actions,
                                  dtype=torch.long,
                                  device=self.device).reshape(rlsz, -1)
        returns = torch.as_tensor(returns,
                                  dtype=torch.float,
                                  device=self.device).reshape(rlsz, -1)

        for epoch in range(self.epochs):
            ixs = torch.randint(rlsz,
                                size=(self.batch_size, ),
                                dtype=torch.long)
            s = states[ixs]
            a = actions[ixs].reshape(-1)
            r = returns[ixs].reshape(-1)

            prepolicy, state_values = self(s)
            state_values = state_values.reshape(-1)
            policy_curr = Categorical(logits=prepolicy)

            # Compute normalized, critic-adjusted returns
            adv = r - state_values
            adv = (adv - adv.mean()) / adv.std()

            # Get log_probs for ratio -- Do not backprop through old policy!
            with torch.no_grad():
                prepolicy, _ = self.old_policy(s)
                log_probs_old = Categorical(logits=prepolicy).log_prob(a)
            log_probs_curr = policy_curr.log_prob(a)
            ratio = torch.exp(log_probs_curr - log_probs_old)

            # Get current policy's entropy
            entropy = policy_curr.entropy().mean()

            # Calculate loss
            vf_loss = nn.functional.mse_loss(state_values, r.squeeze())
            pi_loss = -torch.min(
                (adv * ratio),
                (adv *
                 ratio.clamp(1 - self.clipping, 1 + self.clipping))).mean()
            loss = pi_loss + self.critic_coeff * vf_loss - self.entropy_bonus * entropy

            # Logging
            history["writer"].add_scalar("Train/policy_loss", pi_loss.item(),
                                         history["t_learn"])
            history["writer"].add_scalar("Train/value_loss", vf_loss.item(),
                                         history["t_learn"])
            history["writer"].add_scalar("Train/policy_entropy", entropy,
                                         history["t_learn"])

            # Backprop and step with optional gradient logging
            self.optim.zero_grad()
            loss.backward()
            if self.log_gradients:
                for name, param in self.named_parameters():
                    if param.grad is not None:
                        history["writer"].add_histogram(
                            name,
                            param.grad.clone().cpu().data.numpy(),
                            history["t"])
            self.optim.step()

            history["t_learn"] += 1

        return history
def evaluate_box_proposals(predictions,
                           dataset,
                           thresholds=None,
                           area="all",
                           limit=None):
    """Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official COCO API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        "all": 0,
        "small": 1,
        "medium": 2,
        "large": 3,
        "96-128": 4,
        "128-256": 5,
        "256-512": 6,
        "512-inf": 7,
    }
    area_ranges = [
        [0**2, 1e5**2],  # all
        [0**2, 32**2],  # small
        [32**2, 96**2],  # medium
        [96**2, 1e5**2],  # large
        [96**2, 128**2],  # 96-128
        [128**2, 256**2],  # 128-256
        [256**2, 512**2],  # 256-512
        [512**2, 1e5**2],
    ]  # 512-inf
    assert area in areas, "Unknown area range: {}".format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = []
    num_pos = 0

    for image_id, prediction in enumerate(predictions):
        original_id = dataset.id_to_img_map[image_id]

        img_info = dataset.get_img_info(image_id)
        image_width = img_info["width"]
        image_height = img_info["height"]
        prediction = prediction.resize((image_width, image_height))

        # sort predictions in descending order
        # TODO maybe remove this and make it explicit in the documentation
        inds = prediction.get_field("objectness").sort(descending=True)[1]
        prediction = prediction[inds]

        ann_ids = dataset.coco.getAnnIds(imgIds=original_id)
        anno = dataset.coco.loadAnns(ann_ids)
        gt_boxes = [obj["bbox"] for obj in anno if obj["iscrowd"] == 0]
        gt_boxes = torch.as_tensor(gt_boxes).reshape(
            -1, 4)  # guard against no boxes
        gt_boxes = BoxList(gt_boxes, (image_width, image_height),
                           mode="xywh").convert("xyxy")
        gt_areas = torch.as_tensor(
            [obj["area"] for obj in anno if obj["iscrowd"] == 0])

        if len(gt_boxes) == 0:
            continue

        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <=
                                                       area_range[1])
        gt_boxes = gt_boxes[valid_gt_inds]

        num_pos += len(gt_boxes)

        if len(gt_boxes) == 0:
            continue

        if len(prediction) == 0:
            continue

        if limit is not None and len(prediction) > limit:
            prediction = prediction[:limit]

        overlaps = boxlist_iou(prediction, gt_boxes)

        _gt_overlaps = torch.zeros(len(gt_boxes))
        for j in range(min(len(prediction), len(gt_boxes))):
            # find which proposal box maximally covers each gt box
            # and get the iou amount of coverage for each gt box
            max_overlaps, argmax_overlaps = overlaps.max(dim=0)

            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ovr, gt_ind = max_overlaps.max(dim=0)
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1

        # append recorded iou coverage level
        gt_overlaps.append(_gt_overlaps)
    gt_overlaps = torch.cat(gt_overlaps, dim=0)
    gt_overlaps, _ = torch.sort(gt_overlaps)

    if thresholds is None:
        step = 0.05
        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
    recalls = torch.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {
        "ar": ar,
        "recalls": recalls,
        "thresholds": thresholds,
        "gt_overlaps": gt_overlaps,
        "num_pos": num_pos,
    }
Example #14
0
    def forward(self, outputs, processed_sizes, target_sizes=None):
        """ This function computes the panoptic prediction from the model's predictions.
        Parameters:
            outputs: This is a dict coming directly from the model. See the model doc for the content.
            processed_sizes: This is a list of tuples (or torch tensors) of sizes of the images that were passed to the
                             model, ie the size after data augmentation but before batching.
            target_sizes: This is a list of tuples (or torch tensors) corresponding to the requested final size
                          of each prediction. If left to None, it will default to the processed_sizes
            """
        if target_sizes is None:
            target_sizes = processed_sizes
        assert len(processed_sizes) == len(target_sizes)
        out_logits, raw_masks, raw_boxes = outputs["pred_logits"], outputs[
            "pred_masks"], outputs["pred_boxes"]
        assert len(out_logits) == len(raw_masks) == len(target_sizes)
        preds = []

        def to_tuple(tup):
            if isinstance(tup, tuple):
                return tup
            return tuple(tup.cpu().tolist())

        for cur_logits, cur_masks, cur_boxes, size, target_size in zip(
                out_logits, raw_masks, raw_boxes, processed_sizes,
                target_sizes):
            # we filter empty queries and detection below threshold
            scores, labels = cur_logits.softmax(-1).max(-1)
            keep = labels.ne(outputs["pred_logits"].shape[-1] -
                             1) & (scores > self.threshold)
            cur_scores, cur_classes = cur_logits.softmax(-1).max(-1)
            cur_scores = cur_scores[keep]
            cur_classes = cur_classes[keep]
            cur_masks = cur_masks[keep]
            cur_masks = interpolate(cur_masks[None],
                                    to_tuple(size),
                                    mode="bilinear").squeeze(0)
            cur_boxes = box_ops.box_cxcywh_to_xyxy(cur_boxes[keep])

            h, w = cur_masks.shape[-2:]
            assert len(cur_boxes) == len(cur_classes)

            # It may be that we have several predicted masks for the same stuff class.
            # In the following, we track the list of masks ids for each stuff class (they are merged later on)
            cur_masks = cur_masks.flatten(1)
            stuff_equiv_classes = defaultdict(lambda: [])
            for k, label in enumerate(cur_classes):
                if not self.is_thing_map[label.item()]:
                    stuff_equiv_classes[label.item()].append(k)

            def get_ids_area(masks, scores, dedup=False):
                # This helper function creates the final panoptic segmentation image
                # It also returns the area of the masks that appears on the image

                m_id = masks.transpose(0, 1).softmax(-1)

                if m_id.shape[-1] == 0:
                    # We didn't detect any mask :(
                    m_id = torch.zeros((h, w),
                                       dtype=torch.long,
                                       device=m_id.device)
                else:
                    m_id = m_id.argmax(-1).view(h, w)

                if dedup:
                    # Merge the masks corresponding to the same stuff class
                    for equiv in stuff_equiv_classes.values():
                        if len(equiv) > 1:
                            for eq_id in equiv:
                                m_id.masked_fill_(m_id.eq(eq_id), equiv[0])

                final_h, final_w = to_tuple(target_size)

                seg_img = Image.fromarray(id2rgb(
                    m_id.view(h, w).cpu().numpy()))
                seg_img = seg_img.resize(size=(final_w, final_h),
                                         resample=Image.NEAREST)

                np_seg_img = (torch.ByteTensor(
                    torch.ByteStorage.from_buffer(seg_img.tobytes())).view(
                        final_h, final_w, 3).numpy())
                m_id = torch.from_numpy(rgb2id(np_seg_img))

                area = []
                for i in range(len(scores)):
                    area.append(m_id.eq(i).sum().item())
                return area, seg_img

            area, seg_img = get_ids_area(cur_masks, cur_scores, dedup=True)
            if cur_classes.numel() > 0:
                # We know filter empty masks as long as we find some
                while True:
                    filtered_small = torch.as_tensor(
                        [area[i] <= 4 for i, c in enumerate(cur_classes)],
                        dtype=torch.bool,
                        device=keep.device)
                    if filtered_small.any().item():
                        cur_scores = cur_scores[~filtered_small]
                        cur_classes = cur_classes[~filtered_small]
                        cur_masks = cur_masks[~filtered_small]
                        area, seg_img = get_ids_area(cur_masks, cur_scores)
                    else:
                        break

            else:
                cur_classes = torch.ones(1,
                                         dtype=torch.long,
                                         device=cur_classes.device)

            segments_info = []
            for i, a in enumerate(area):
                cat = cur_classes[i].item()
                segments_info.append({
                    "id": i,
                    "isthing": self.is_thing_map[cat],
                    "category_id": cat,
                    "area": a
                })
            del cur_classes

            with io.BytesIO() as out:
                seg_img.save(out, format="PNG")
                predictions = {
                    "png_string": out.getvalue(),
                    "segments_info": segments_info
                }
            preds.append(predictions)
        return preds
Example #15
0
def dist_matrix(x, y, c=1.0):
    c = torch.as_tensor(c).type_as(x)
    return _dist_matrix(x, y, c)
Example #16
0
 def __getitem__(self, index):
     # Load data and get label
     X = torch.as_tensor(self.data[index]).long()
     y = torch.as_tensor(self.labels[index])
     return X, y
Example #17
0
        if continue_train:
            state = torch.load(path_name)
            self.lr = state['lr']
            self.model.load_state_dict(state['model'])
            self.optimizer.load_state_dict(state['optimizer'])
            self.histories = [[], []]
        else:
            torch.save(self.model, path_name)

    def print_model(self):
        batch_in = next(iter(self.loaders[0]))[0]
        summary(self.model, batch_in.shape)


if __name__ == "__main__":
    possibleRes = torch.as_tensor(np.array([0, 1])).float()

    tttIn = np.random.randint(0, 2, [100, 9])
    tttOut = np.array(
        [[(tttIn[i, 0] and tttIn[i, 4] and tttIn[i, 8]) or (tttIn[i, 2] and tttIn[i, 4] and tttIn[i, 6]) for i in
          range(100)]]).transpose()
    # tttOutNot = (tttOut + 1) % 2
    # tttOut = np.concatenate([tttOut, tttOutNot], axis=0).transpose()
    tttData = data.TensorDataset(torch.as_tensor(tttIn).float(), torch.as_tensor(tttOut).float())

    model = nn.Linear(9, 1)
    print(model(torch.from_numpy(np.array([i for i in range(9)])).float()))

    lr = 0.1
    optimizer = optim.SGD(model.parameters(), lr)
    loss_func = nn.MSELoss()
Example #18
0
def resize(
    data: dict,
    size: Union[int, Tuple[int, int], Tuple[int, int, int]],
    max_size: Optional[int] = None,
):
    # size can be min_size (scalar) or (w, h) tuple

    def get_size_with_aspect_ratio(image_size, size, max_size=None):
        w, h = image_size
        if max_size is not None:
            min_original_size = float(min(image_size))
            max_original_size = float(max(image_size))
            if max_original_size / min_original_size * size > max_size:
                size = int(
                    round(max_size * min_original_size / max_original_size))

        if max(image_size) == size:
            return size

        if (w <= h and w == size) or (h <= w and h == size):
            return (h, w)

        if w < h:
            ow = size
            oh = int(size * h / w)
        else:
            oh = size
            ow = int(size * w / h)

        return (oh, ow)

    def get_size(image_size, size, max_size=None):
        if isinstance(size, (list, tuple)):
            return size[::-1]
        else:
            return get_size_with_aspect_ratio(image_size, size, max_size)

    image = data["data"]
    target = data.copy()

    size = get_size(image.size, size, max_size)
    rescaled_image = F.resize(image, size)

    if len(target) == 1:
        return {"data": rescaled_image}

    ratios = tuple(
        float(s) / float(s_orig)
        for s, s_orig in zip(rescaled_image.size, image.size))
    ratio_width, ratio_height = ratios

    target = target.copy()
    if "boxes" in target:
        boxes = target["boxes"]
        scaled_boxes = boxes * torch.as_tensor(
            [ratio_width, ratio_height, ratio_width, ratio_height])
        target["boxes"] = scaled_boxes

    if "area" in target:
        area = target["area"]
        scaled_area = area * (ratio_width * ratio_height)
        target["area"] = scaled_area

    target["size"] = torch.tensor(size)

    if "masks" in target:
        target["masks"] = (interpolate(target["masks"][:, None].float(),
                                       size,
                                       mode="nearest")[:, 0] > 0.5)

    target["data"] = rescaled_image

    return target
        cmd = 'cp train_acos_regressor_24_joints.py ./{}/snapshot.py'.format(ckpt_path)
    else:
        cmd = r'copy train_acos_regressor_24_joints.py {}\snapshot.py'.format(ckpt_path)
    print(cmd)
    os.system(cmd)
    
    file = open('{}/validation.txt'.format(ckpt_path), 'w')

    trans = torch.zeros((batch_size, 3), dtype=torch.float64, device=device)

    while batch_num < max_batch_num:
        batch_num += 1
        print('Epoch %03d: training...' % batch_num)
        reg.train()
        for (i, data) in enumerate(dataloader):
            joints = torch.as_tensor(data['joints'], device=device)
            thetas = torch.as_tensor(data['thetas'], device=device)
            betas = torch.as_tensor(data['betas'], device=device)
            
            pred_thetas = reg(joints)
            _, recon_joints = smpl(betas, pred_thetas, trans)
            loss_joints = loss_op(recon_joints, joints)
            loss_thetas = loss_(pred_thetas, thetas)
            loss = loss_thetas + 5 * loss_joints
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if i % 32 == 0:
                print('batch %04d: loss joints: %10.6f loss thetas: % 10.6f' \
                    % (i, loss_joints.data.item(), loss_thetas.data.item()))
Example #20
0
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(
            dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"],
                                 format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        for i in range(len(dataset_dict["annotations"])):
            dataset_dict["annotations"][i]["segmentation"] = []

        ### my code ##
        image, dataset_dict = self.aug_handler(
            image=image, dataset_dict_detectron=dataset_dict)
        ### my code ##

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens,
                image)
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(
            np.ascontiguousarray(image.transpose(2, 0, 1)))

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(dataset_dict, image_shape, transforms,
                                      self.min_box_side_len,
                                      self.proposal_topk)

        if not self.is_train:
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj,
                    transforms,
                    image_shape,
                    keypoint_hflip_indices=self.keypoint_hflip_indices)
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format)
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)

        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"),
                                  "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            dataset_dict["sem_seg"] = sem_seg_gt

        if True:
            vis_img = image.copy()
            bbox_list = [
                BBox.from_list(vals) for vals in
                dataset_dict["instances"].gt_boxes.tensor.numpy().tolist()
            ]
            # seg_list = [Segmentation([Polygon.from_list(poly.tolist(), demarcation=False) for poly in seg_polys]) for seg_polys in dataset_dict["instances"].gt_masks.polygons]
            for bbox in (bbox_list):
                # if len(seg) > 0 and False:
                #     vis_img = draw_segmentation(img=vis_img, segmentation=seg, transparent=True)
                vis_img = draw_bbox(img=vis_img, bbox=bbox)
            aug_vis.step(vis_img)
        return dataset_dict
Example #21
0
if not args.separate_constants:
    len_constant=args.batch_size
    images=[torch.ones([1,3,16,16])*(-1+2*i/(len_constant-1)) for i in range(len_constant)]
    constant_images=torch.cat(images)
else:
    len_constant=args.batch_size
    images=[torch.ones([1,3,16,16])*(-1+2*i/(len_constant-1)) for i in range(len_constant)]
    constant_image_list=images
# This code creates the low-variance images

# In[13]:


len_low_variance=args.batch_size
X=get_truncated_normal(mean=0,sd=0.05,low=-1,upp=1)
lowvar_images=torch.reshape(torch.as_tensor(X.rvs(len_low_variance*256*3),dtype=torch.float),(len_low_variance,3,16,16))

model = PixelCNN(nr_resnet=args.nr_resnet, nr_filters=args.nr_filters, 
            input_channels=input_channels, nr_logistic_mix=args.nr_logistic_mix)
model = model.cuda()


# This code creates the SVHN images

# In[15]:

x=datasets.SVHN(root=args.data_dir,split='train',transform=ds_transforms,download=True)
train_loader_svhn=torch.utils.data.DataLoader(x,batch_size=args.batch_size,shuffle=True)
image_batch=next(iter(train_loader_svhn))[0]
print(image_batch.shape)
if args.load_params:
 def normalize(self, image):
     dtype, device = image.dtype, image.device
     mean = torch.as_tensor(self.image_mean, dtype=dtype, device=device)
     std = torch.as_tensor(self.image_std, dtype=dtype, device=device)
     return (image - mean[:, None, None]) / std[:, None, None]
Example #23
0
def detect_face(imgs, minsize, pnet, rnet, onet, threshold, factor, device):
    if not isinstance(imgs, Iterable):
        imgs = [imgs]
    if any(img.shape != imgs[0].shape for img in imgs):
        raise Exception(
            "MTCNN batch processing only compatible with equal-dimension images."
        )
    if not isinstance(imgs, torch.Tensor):
        imgs_np = np.stack([np.uint8(img) for img in imgs])
        imgs = torch.as_tensor(imgs_np, device=device).permute(0, 3, 1, 2)

    imgs = imgs.to(device)
    batch_size = len(imgs)
    h, w = imgs.shape[2:4]
    m = 12.0 / minsize
    minl = min(h, w)
    minl = minl * m

    # Create scale pyramid
    scale_i = m
    scales = []
    while minl >= 12:
        scales.append(scale_i)
        scale_i = scale_i * factor
        minl = minl * factor

    # First stage
    boxes = []
    image_inds = []
    all_inds = []
    all_i = 0
    for scale in scales:
        im_data = imresample(imgs, (int(h * scale + 1), int(w * scale + 1)))
        im_data = (im_data - 127.5) * 0.0078125
        reg, probs = pnet(im_data)

        boxes_scale, image_inds_scale = generateBoundingBox(
            reg, probs[:, 1], scale, threshold[0])
        boxes.append(boxes_scale)
        image_inds.append(image_inds_scale)
        all_inds.append(all_i + image_inds_scale)
        all_i += batch_size

    boxes = torch.cat(boxes, dim=0)
    image_inds = torch.cat(image_inds, dim=0)
    all_inds = torch.cat(all_inds, dim=0)

    # NMS within each scale + image
    pick = batched_nms(boxes[:, :4], boxes[:, 4], all_inds, 0.5)
    boxes, image_inds = boxes[pick], image_inds[pick]

    # NMS within each image
    pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7)
    boxes, image_inds = boxes[pick], image_inds[pick]

    regw = boxes[:, 2] - boxes[:, 0]
    regh = boxes[:, 3] - boxes[:, 1]
    qq1 = boxes[:, 0] + boxes[:, 5] * regw
    qq2 = boxes[:, 1] + boxes[:, 6] * regh
    qq3 = boxes[:, 2] + boxes[:, 7] * regw
    qq4 = boxes[:, 3] + boxes[:, 8] * regh
    boxes = torch.stack([qq1, qq2, qq3, qq4, boxes[:, 4]]).permute(1, 0)
    boxes = rerec(boxes)
    y, ey, x, ex = pad(boxes, w, h)

    # Second stage
    if len(boxes) > 0:
        im_data = []
        for k in range(len(y)):
            if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1):
                img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k],
                             (x[k] - 1):ex[k]].unsqueeze(0)
                im_data.append(imresample(img_k, (24, 24)))
        im_data = torch.cat(im_data, dim=0)
        im_data = (im_data - 127.5) * 0.0078125
        out = rnet(im_data)

        out0 = out[0].permute(1, 0)
        out1 = out[1].permute(1, 0)
        score = out1[1, :]
        ipass = score > threshold[1]
        boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1)
        image_inds = image_inds[ipass]
        mv = out0[:, ipass].permute(1, 0)

        # NMS within each image
        pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7)
        boxes, image_inds, mv = boxes[pick], image_inds[pick], mv[pick]
        boxes = bbreg(boxes, mv)
        boxes = rerec(boxes)

    # Third stage
    points = torch.zeros(0, 5, 2, device=device)
    if len(boxes) > 0:
        y, ey, x, ex = pad(boxes, w, h)
        im_data = []
        for k in range(len(y)):
            if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1):
                img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k],
                             (x[k] - 1):ex[k]].unsqueeze(0)
                im_data.append(imresample(img_k, (48, 48)))
        im_data = torch.cat(im_data, dim=0)
        im_data = (im_data - 127.5) * 0.0078125
        out = onet(im_data)

        out0 = out[0].permute(1, 0)
        out1 = out[1].permute(1, 0)
        out2 = out[2].permute(1, 0)
        score = out2[1, :]
        points = out1
        ipass = score > threshold[2]
        points = points[:, ipass]
        boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1)
        image_inds = image_inds[ipass]
        mv = out0[:, ipass].permute(1, 0)

        w_i = boxes[:, 2] - boxes[:, 0] + 1
        h_i = boxes[:, 3] - boxes[:, 1] + 1
        points_x = w_i.repeat(5, 1) * points[:5, :] + boxes[:, 0].repeat(5,
                                                                         1) - 1
        points_y = h_i.repeat(5, 1) * points[5:10, :] + boxes[:, 1].repeat(
            5, 1) - 1
        points = torch.stack((points_x, points_y)).permute(2, 1, 0)
        boxes = bbreg(boxes, mv)

        # NMS within each image using "Min" strategy
        pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7)
        # pick = batched_nms_numpy(boxes[:, :4], boxes[:, 4], image_inds, 0.7, 'Min')
        boxes, image_inds, points = boxes[pick], image_inds[pick], points[pick]

    # boxes = boxes.cpu().numpy()
    # points = points.cpu().numpy()
    batch_boxes = []
    batch_points = []
    for b_i in range(batch_size):
        b_i_inds = torch.where(image_inds == b_i)
        batch_boxes.append(boxes[b_i_inds])
        batch_points.append(points[b_i_inds])

    # batch_boxes, batch_points = np.array(batch_boxes), np.array(batch_points)
    # batch_boxes, batch_points = torch.tensor(batch_boxes), torch.tensor(batch_points)
    # print(f'batch_boxes: {batch_boxes.shape}')
    # print(f'batch_points: {batch_points.shape}')

    return batch_boxes, batch_points
Example #24
0
    def _set_power_law(self, value):
        if not torch.is_tensor(value):
            value = torch.as_tensor(value).to(self.raw_power_law)

        self.initialize(raw_power_law=self.raw_power_law_constraint.
                        inverse_transform(value))
Example #25
0
 def as_tensors(self, *args, **kwargs):
     "Helper that makes everything a tensor with self.X's type."
     kwargs.setdefault("device", self.X.device)
     kwargs.setdefault("dtype", self.X.dtype)
     return tuple(None if r is None else torch.as_tensor(r, **kwargs)
                  for r in args)
    def train(self) -> None:
        """
        Update policy using the currently gathered rollout buffer.
        """
        self.policy.train()
        # Update optimizer learning rate
        for i in range(self.vehicle_num):
            for param_group in self.policy.ACP[i].optimizer.param_groups:
                param_group["lr"] = self.learning_rate

        # train for n_epochs epochs
        for epoch in range(self.n_epochs):
            approx_kl_divs = []
            # Do a complete pass on the rollout buffer
            for rollout_data in self.rollout_buffer.get(self.batch_size):

                # Re-sample the noise matrix because the log_std has changed
                # if that line is commented (as in SAC)

                values, log_prob, entropy = self.policy.forward(
                    loc_features=rollout_data.loc,
                    weight_features=rollout_data.weight[:, np.newaxis],
                    actions=rollout_data.actions,
                )

                # Normalize advantage
                advantages = rollout_data.advantages
                advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)

                # flatten data
                values = torch.flatten(values)
                log_prob = torch.flatten(log_prob)
                entropy = torch.flatten(entropy)
                old_log_prob = torch.flatten(torch.as_tensor(
                    rollout_data.old_log_prob, dtype=torch.float32, device=self.device))
                advantages = torch.flatten(torch.as_tensor(
                    advantages, dtype=torch.float32, device=self.device))
                old_values = rollout_data.old_values
                returns = torch.flatten(torch.as_tensor(
                    rollout_data.returns, dtype=torch.float32, device=self.device))

                # ratio between old and new policy, should be one at the first iteration
                ratio = torch.exp(log_prob - old_log_prob)

                # clipped surrogate loss
                policy_loss_1 = advantages * ratio
                policy_loss_2 = advantages * torch.clamp(ratio, 1 - self.clip_range, 1 + self.clip_range)
                policy_loss = - torch.min(policy_loss_1, policy_loss_2).mean()

                if self.clip_range_vf is None:
                    # No clipping
                    values_pred = values
                else:
                    # Clip the different between old and new value
                    # NOTE: this depends on the reward scaling
                    old_values = torch.flatten(torch.as_tensor(old_values, dtype=torch.float32, device=self.device))
                    values_pred = old_values + torch.clamp(
                        values - old_values, - self.clip_range_vf, self.clip_range_vf
                    )
                # Value loss using the TD(gae_lambda) target
                value_loss = F.mse_loss(returns, values_pred)

                # Entropy loss favor exploration
                if entropy is None:
                    # Approximate entropy when no analytical form
                    entropy_loss = - torch.mean(- log_prob)
                else:
                    entropy_loss = - torch.mean(entropy)

                loss = policy_loss + self.ent_coef * entropy_loss + self.vf_coef * value_loss

                # Optimization step
                self.policy.optimize(
                    loss=loss,
                    max_grad_norm=self.max_grad_norm,
                )
                approx_kl_divs.append(torch.mean(old_log_prob - log_prob).detach().cpu().numpy())

            if self.target_kl is not None and np.mean(approx_kl_divs) > 1.5 * self.target_kl:
                print(f"Early stopping at step {epoch} due to reaching max kl: {np.mean(approx_kl_divs):.2f}")
                break
Example #27
0
    def __call__(self, image, target):
        w, h = image.size

        image_id = target["image_id"]
        image_id = torch.tensor([image_id])

        anno = target["annotations"]

        anno = [
            obj for obj in anno if 'iscrowd' not in obj or obj['iscrowd'] == 0
        ]

        boxes = [obj["bbox"] for obj in anno]
        # guard against no boxes via resizing
        boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
        boxes[:, 2:] += boxes[:, :2]
        boxes[:, 0::2].clamp_(min=0, max=w)
        boxes[:, 1::2].clamp_(min=0, max=h)

        classes = [obj["category_id"] for obj in anno]
        classes = torch.tensor(classes, dtype=torch.int64)

        if self.return_masks:
            segmentations = [obj["segmentation"] for obj in anno]
            masks = convert_coco_poly_to_mask(segmentations, h, w)

        keypoints = None
        if anno and "keypoints" in anno[0]:
            keypoints = [obj["keypoints"] for obj in anno]
            keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
            num_keypoints = keypoints.shape[0]
            if num_keypoints:
                keypoints = keypoints.view(num_keypoints, -1, 3)

        keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
        boxes = boxes[keep]
        classes = classes[keep]
        if self.return_masks:
            masks = masks[keep]
        if keypoints is not None:
            keypoints = keypoints[keep]

        target = {}
        target["boxes"] = boxes
        target["labels"] = classes
        if self.return_masks:
            target["masks"] = masks
        target["image_id"] = image_id
        if keypoints is not None:
            target["keypoints"] = keypoints

        # for conversion to coco api
        area = torch.tensor([obj["area"] for obj in anno])
        iscrowd = torch.tensor(
            [obj["iscrowd"] if "iscrowd" in obj else 0 for obj in anno])
        target["area"] = area[keep]
        target["iscrowd"] = iscrowd[keep]

        target["orig_size"] = torch.as_tensor([int(h), int(w)])
        target["size"] = torch.as_tensor([int(h), int(w)])

        return image, target
Example #28
0
    def _setup_indices(self, z_indices, cart_indices):
        n_atoms = self.dims // 3
        ind_for_atom = torch.zeros(n_atoms, 3, dtype=torch.long)
        for i in range(n_atoms):
            ind_for_atom[i, 0] = 3 * i
            ind_for_atom[i, 1] = 3 * i + 1
            ind_for_atom[i, 2] = 3 * i + 2
        self.register_buffer("inds_for_atom", ind_for_atom)

        sorted_z_indices = topological_sort(z_indices)
        sorted_z_indices = [[item[0], item[1][0], item[1][1], item[1][2]]
                            for item in sorted_z_indices]
        rev_z_indices = list(reversed(sorted_z_indices))

        mod = [item[0] for item in sorted_z_indices]
        modified_indices = []
        for index in mod:
            modified_indices.extend(self.inds_for_atom[index])
        bond_indices = list(modified_indices[0::3])
        angle_indices = list(modified_indices[1::3])
        dih_indices = list(modified_indices[2::3])

        self.register_buffer("modified_indices",
                             torch.LongTensor(modified_indices))
        self.register_buffer("bond_indices", torch.LongTensor(bond_indices))
        self.register_buffer("angle_indices", torch.LongTensor(angle_indices))
        self.register_buffer("dih_indices", torch.LongTensor(dih_indices))
        self.register_buffer("sorted_z_indices",
                             torch.LongTensor(sorted_z_indices))
        self.register_buffer("rev_z_indices", torch.LongTensor(rev_z_indices))

        #
        # Setup indexing for reverse pass.
        #
        # First, create an array that maps from an atom index into mean_bonds, std_bonds, etc.
        atom_to_stats = torch.zeros(n_atoms, dtype=torch.long)
        for i, j in enumerate(mod):
            atom_to_stats[j] = i
        self.register_buffer("atom_to_stats", atom_to_stats)

        # Next create permutation vector that is used in the reverse pass. This maps
        # from the original atom indexing to the order that the cartesian coordinates
        # will be built in. This will be filled in as we go.
        rev_perm = torch.zeros(n_atoms, dtype=torch.long)
        self.register_buffer("rev_perm", rev_perm)
        # Next create the inverse of rev_perm. This will be filled in as we go.
        rev_perm_inv = torch.zeros(n_atoms, dtype=torch.long)
        self.register_buffer("rev_perm_inv", rev_perm_inv)

        # Create the list of columns that form our initial cartesian coordintes.
        init_cart_indices = self.inds_for_atom[cart_indices].view(-1)
        self.register_buffer("init_cart_indices", init_cart_indices)

        # Update our permutation vectors for the initial cartesian atoms.
        for i, j in enumerate(cart_indices):
            self.rev_perm[i] = torch.as_tensor(j, dtype=torch.long)
            self.rev_perm_inv[j] = torch.as_tensor(i, dtype=torch.long)

        # Break Z into blocks, where all of the atoms within a block can be built
        # in parallel, because they only depend on already-cartesian atoms.
        all_cart = set(cart_indices)
        current_cart_ind = i + 1
        blocks = []
        while sorted_z_indices:
            next_z_indices = []
            next_cart = set()
            block = []
            for atom1, atom2, atom3, atom4 in sorted_z_indices:
                if (atom2 in all_cart) and (atom3
                                            in all_cart) and (atom4
                                                              in all_cart):
                    # We can build this atom from existing cartesian atoms, so we add
                    # it to the list of cartesian atoms available for the next block.
                    next_cart.add(atom1)

                    # Add this atom to our permutation marices.
                    self.rev_perm[current_cart_ind] = atom1
                    self.rev_perm_inv[atom1] = current_cart_ind
                    current_cart_ind += 1

                    # Next, we convert the indices for atoms2-4 from their normal values
                    # to the appropriate indices to index into the cartesian array.
                    atom2_mod = self.rev_perm_inv[atom2]
                    atom3_mod = self.rev_perm_inv[atom3]
                    atom4_mod = self.rev_perm_inv[atom4]

                    # Finally, we append this information to the current block.

                    block.append([atom1, atom2_mod, atom3_mod, atom4_mod])
                else:
                    # We can't build this atom from existing cartesian atoms,
                    # so put it on the list for next time.
                    next_z_indices.append([atom1, atom2, atom3, atom4])
            sorted_z_indices = next_z_indices
            all_cart = all_cart.union(next_cart)
            block = torch.as_tensor(block, dtype=torch.long)
            blocks.append(block)
        self.rev_blocks = blocks
Example #29
0
 def __init__(self, obs_dim, act_dim, hidden_sizes, activation):
     super().__init__()
     log_std = -0.5 * np.ones(act_dim, dtype=np.float32)
     self.log_std = torch.nn.Parameter(torch.as_tensor(log_std))
     self.mu_net = mlp([obs_dim] + list(hidden_sizes) + [act_dim],
                       activation)
Example #30
0
        method (str): method to determine the spike threshold
                      (relevant for surrogate gradients)
        alpha (float): hyper parameter to use in surrogate gradient computation
    """

    tau_syn_inv: torch.Tensor = torch.as_tensor(1.0 / 5e-3)
    tau_mem_inv: torch.Tensor = torch.as_tensor(1.0 / 1e-2)
    v_leak: torch.Tensor = torch.as_tensor(0.0)
    v_th: torch.Tensor = torch.as_tensor(1.0)
    v_reset: torch.Tensor = torch.as_tensor(0.0)
    method: str = "super"
    alpha: float = torch.as_tensor(100.0)


default_bio_parameters = LIFParameters(
    tau_syn_inv=torch.as_tensor(1 / 0.5),
    tau_mem_inv=torch.as_tensor(1 / 20.0),
    v_leak=torch.as_tensor(-65.0),
    v_th=torch.as_tensor(-50.0),
    v_reset=torch.as_tensor(-65.0),
)


class LIFState(NamedTuple):
    """State of a LIF neuron

    Parameters:
        z (torch.Tensor): recurrent spikes
        v (torch.Tensor): membrane potential
        i (torch.Tensor): synaptic input current
    """
Example #31
0
    def get_mvar_set_cpu(self, Y: Tensor) -> Tensor:
        r"""Find MVaR set based on the definition in [Prekopa2012MVaR]_.

        NOTE: This is much faster on CPU for large `n_w` than the alternative but it
        is significantly slower on GPU. Based on empirical evidence, this is recommended
        when running on CPU with `n_w > 64`.

        This first calculates the CDF for each point on the extended domain of the
        random variable (the grid defined by the given samples), then takes the
        values with CDF equal to (rounded if necessary) `alpha`. The non-dominated
        subset of these form the MVaR set.

        Args:
            Y: A `batch x n_w x m`-dim tensor of outcomes. This is currently
                restricted to `m = 2` objectives.
                TODO: Support `m > 2` objectives.

        Returns:
            A `batch` length list of `k x m`-dim tensor of MVaR values, where `k`
            depends on the corresponding batch inputs. Note that MVaR values in general
            are not in-sample points.
        """
        if Y.dim() == 3:
            return [self.get_mvar_set_cpu(y_) for y_ in Y]
        m = Y.shape[-1]
        if m != 2:  # pragma: no cover
            raise ValueError(
                "`get_mvar_set_cpu` only supports `m=2` outcomes!")
        # Generate sets of all unique values in each output dimension.
        # Note that points in MVaR are bounded from above by the
        # independent VaR of each objective. Hence, we only need to
        # consider the unique outcomes that are less than or equal to
        # the VaR of the independent objectives
        var_alpha_idx = ceil(self.alpha * self.n_w) - 1
        Y_sorted = Y.topk(Y.shape[0] - var_alpha_idx, dim=0,
                          largest=False).values
        unique_outcomes_list = [
            Y_sorted[:, i].unique().tolist()[::-1] for i in range(m)
        ]
        # Convert this into a list of m dictionaries mapping values to indices.
        unique_outcomes = [
            dict(zip(outcomes, range(len(outcomes))))
            for outcomes in unique_outcomes_list
        ]
        # Initialize a tensor counting the number of points in Y that a given grid point
        # is dominated by. This will essentially be a non-normalized CDF.
        counter_tensor = torch.zeros(
            [len(outcomes) for outcomes in unique_outcomes],
            dtype=torch.long,
            device=Y.device,
        )
        # populate the tensor, counting the dominated points.
        # we only need to consider points in Y where at least one
        # objective is less than the max objective value in
        # unique_outcomes_list
        max_vals = torch.tensor([o[0] for o in unique_outcomes_list],
                                dtype=Y.dtype,
                                device=Y.device)
        mask = (Y < max_vals).any(dim=-1)
        counter_tensor += self.n_w - mask.sum()
        Y_pruned = Y[mask]
        for y_ in Y_pruned:
            starting_idcs = [
                unique_outcomes[i].get(y_[i].item(), 0) for i in range(m)
            ]
            counter_tensor[starting_idcs[0]:, starting_idcs[1]:] += 1

        # Get the count alpha-level points should have.
        alpha_count = ceil(self.alpha * self.n_w)
        # Get the alpha level indices.
        alpha_level_indices = (counter_tensor == alpha_count).nonzero(
            as_tuple=False)
        # If there are no exact alpha level points, get the smallest alpha' > alpha
        # and find the corresponding alpha level indices.
        if alpha_level_indices.numel() == 0:
            min_greater_than_alpha = counter_tensor[
                counter_tensor > alpha_count].min()
            alpha_level_indices = (
                counter_tensor == min_greater_than_alpha).nonzero(
                    as_tuple=False)
        unique_outcomes = [
            torch.as_tensor(list(outcomes.keys()),
                            device=Y.device,
                            dtype=Y.dtype) for outcomes in unique_outcomes
        ]
        alpha_level_points = torch.stack(
            [
                unique_outcomes[i][alpha_level_indices[:, i]]
                for i in range(len(unique_outcomes))
            ],
            dim=-1,
        )
        # MVaR is simply the non-dominated subset of alpha level points.
        if self.filter_dominated:
            mask = is_non_dominated(alpha_level_points)
            mvar = alpha_level_points[mask]
        else:
            mvar = alpha_level_points
        return mvar
Example #32
0
def ctc_align(args, device):
    """ESPnet-specific interface for CTC segmentation.

    Parses configuration, infers the CTC posterior probabilities,
    and then aligns start and end of utterances using CTC segmentation.
    Results are written to the output file given in the args.

    :param args: given configuration
    :param device: for inference; one of ['cuda', 'cpu']
    :return:  0 on success
    """
    model, train_args = load_trained_model(args.model)
    assert isinstance(model, ASRInterface)
    load_inputs_and_targets = LoadInputsAndTargets(
        mode="asr",
        load_output=True,
        sort_in_input_length=False,
        preprocess_conf=train_args.preprocess_conf
        if args.preprocess_conf is None else args.preprocess_conf,
        preprocess_args={"train": False},
    )
    logging.info(f"Decoding device={device}")
    # Warn for nets with high memory consumption on long audio files
    if hasattr(model, "enc"):
        encoder_module = model.enc.__class__.__module__
    elif hasattr(model, "encoder"):
        encoder_module = model.encoder.__class__.__module__
    else:
        encoder_module = "Unknown"
    logging.info(f"Encoder module: {encoder_module}")
    logging.info(f"CTC module:     {model.ctc.__class__.__module__}")
    if "rnn" not in encoder_module:
        logging.warning(
            "No BLSTM model detected; memory consumption may be high.")
    model.to(device=device).eval()
    # read audio and text json data
    with open(args.data_json, "rb") as f:
        js = json.load(f)["utts"]
    with open(args.utt_text, "r", encoding="utf-8") as f:
        lines = f.readlines()
        i = 0
        text = {}
        segment_names = {}
        for name in js.keys():
            text_per_audio = []
            segment_names_per_audio = []
            while i < len(lines) and lines[i].startswith(name):
                text_per_audio.append(lines[i][lines[i].find(" ") + 1:])
                segment_names_per_audio.append(lines[i][:lines[i].find(" ")])
                i += 1
            text[name] = text_per_audio
            segment_names[name] = segment_names_per_audio
    # apply configuration
    config = CtcSegmentationParameters()
    if args.subsampling_factor is not None:
        config.subsampling_factor = args.subsampling_factor
    if args.frame_duration is not None:
        config.frame_duration_ms = args.frame_duration
    if args.min_window_size is not None:
        config.min_window_size = args.min_window_size
    if args.max_window_size is not None:
        config.max_window_size = args.max_window_size
    config.char_list = train_args.char_list
    if args.use_dict_blank and args.use_dict_blank > 0:
        config.blank = config.char_list[0]
        logging.info(f"Blank char was set to >{config.blank}<")
    else:
        logging.debug(
            f"Blank char >{config.blank}< (align) >{config.char_list[0]}< (model)"
        )
        if config.blank != config.char_list[0]:
            logging.error(
                "Blank char mismatch; this can result in an IndexError.")
            logging.error(
                "Pass the parameter --use-dict-blank 1 to asr_align.py")
    if args.scoring_length is not None:
        config.score_min_mean_over_L = args.scoring_length
    logging.info(
        f"Frame timings: {config.frame_duration_ms}ms * {config.subsampling_factor}"
    )
    # Iterate over audio files to decode and align
    for idx, name in enumerate(js.keys(), 1):
        logging.info("(%d/%d) Aligning " + name, idx, len(js.keys()))
        batch = [(name, js[name])]
        feat, label = load_inputs_and_targets(batch)
        feat = feat[0]
        with torch.no_grad():
            # Encode input frames
            enc_output = model.encode(
                torch.as_tensor(feat).to(device)).unsqueeze(0)
            # Apply ctc layer to obtain log character probabilities
            lpz = model.ctc.log_softmax(enc_output)[0].cpu().numpy()
        # Prepare the text for aligning
        ground_truth_mat, utt_begin_indices = prepare_text(config, text[name])
        # Align using CTC segmentation
        timings, char_probs, state_list = ctc_segmentation(
            config, lpz, ground_truth_mat)
        logging.debug(f"state_list = {state_list}")
        # Obtain list of utterances with time intervals and confidence score
        segments = determine_utterance_segments(config, utt_begin_indices,
                                                char_probs, timings,
                                                text[name])
        # Write to "segments" file
        for i, boundary in enumerate(segments):
            utt_segment = (f"{segment_names[name][i]} {name} {boundary[0]:.2f}"
                           f" {boundary[1]:.2f} {boundary[2]:.9f}\n")
            args.output.write(utt_segment)
    return 0
    def _prepare_batches(self):
        dataset_size = len(self.group_ids)
        # get the sampled indices from the sampler
        sampled_ids = torch.as_tensor(list(self.sampler))
        # potentially not all elements of the dataset were sampled
        # by the sampler (e.g., DistributedSampler).
        # construct a tensor which contains -1 if the element was
        # not sampled, and a non-negative number indicating the
        # order where the element was sampled.
        # for example. if sampled_ids = [3, 1] and dataset_size = 5,
        # the order is [-1, 1, -1, 0, -1]
        order = torch.full((dataset_size,), -1, dtype=torch.int64)
        order[sampled_ids] = torch.arange(len(sampled_ids))

        # get a mask with the elements that were sampled
        mask = order >= 0

        # find the elements that belong to each individual cluster
        clusters = [(self.group_ids == i) & mask for i in self.groups]
        # get relative order of the elements inside each cluster
        # that follows the order from the sampler
        relative_order = [order[cluster] for cluster in clusters]
        # with the relative order, find the absolute order in the
        # sampled space
        permutation_ids = [s[s.sort()[1]] for s in relative_order]
        # permute each cluster so that they follow the order from
        # the sampler
        permuted_clusters = [sampled_ids[idx] for idx in permutation_ids]

        # splits each cluster in batch_size, and merge as a list of tensors
        splits = [c.split(self.batch_size) for c in permuted_clusters]
        merged = tuple(itertools.chain.from_iterable(splits))

        # now each batch internally has the right order, but
        # they are grouped by clusters. Find the permutation between
        # different batches that brings them as close as possible to
        # the order that we have in the sampler. For that, we will consider the
        # ordering as coming from the first element of each batch, and sort
        # correspondingly
        first_element_of_batch = [t[0].item() for t in merged]
        # get and inverse mapping from sampled indices and the position where
        # they occur (as returned by the sampler)
        inv_sampled_ids_map = {v: k for k, v in enumerate(sampled_ids.tolist())}
        # from the first element in each batch, get a relative ordering
        first_index_of_batch = torch.as_tensor(
            [inv_sampled_ids_map[s] for s in first_element_of_batch]
        )

        # permute the batches so that they approximately follow the order
        # from the sampler
        permutation_order = first_index_of_batch.sort(0)[1].tolist()
        # finally, permute the batches
        batches = [merged[i].tolist() for i in permutation_order]

        if self.drop_uneven:
            kept = []
            for batch in batches:
                if len(batch) == self.batch_size:
                    kept.append(batch)
            batches = kept
        return batches
        batch_actions.append(action.item())
        episode_rewards.append(reward)

        if done:
            episode_return = sum(episode_rewards)
            episode_length = len(episode_rewards)

            batch_episode_returns.append(episode_return)
            batch_episode_lengths.append(episode_length)

            batch_weights += [episode_return] * episode_length

            observation = env.reset()
            done = False
            episode_rewards = []

            if len(batch_observations) > batch_size:
                break
            
    n.train()
    opt.zero_grad()

    logits = n(torch.as_tensor(batch_observations).type(torch.FloatTensor))
    loss = -torch.mean(torch.FloatTensor(batch_weights) * dist.Categorical(logits=logits).log_prob(torch.IntTensor(batch_actions)))
    print(f" loss: {loss:.2f} return: {np.mean(batch_episode_returns):.2f} episode length: {np.mean(batch_episode_lengths):.2f}")
    loss.backward()

    opt.step()

torch.save(n.state_dict, './gym-cartpole-torch-model.pt')
Example #35
0
    def __getitem__(self, index):
        """
        Given the video index, return the list of frames, label, and video
        index if the video frames can be fetched.
        Args:
            index (int): the video index provided by the pytorch sampler.
        Returns:
            frames (tensor): the frames of sampled from the video. The dimension
                is `channel` x `num frames` x `height` x `width`.
            label (int): the label of the current video.
            index (int): the index of the video.
        """

        if self.mode in ["train", "val"]:
            # -1 indicates random sampling.
            spatial_sample_index = -1
            min_scale = self.cfg.DATA.TRAIN_JITTER_SCALES[0]
            max_scale = self.cfg.DATA.TRAIN_JITTER_SCALES[1]
            crop_size = self.cfg.DATA.TRAIN_CROP_SIZE
        elif self.mode in ["test"]:
            # spatial_sample_index is in [0, 1, 2]. Corresponding to left,
            # center, or right if width is larger than height, and top, middle,
            # or bottom if height is larger than width.
            spatial_sample_index = (self._spatial_temporal_idx[index] %
                                    self.cfg.TEST.NUM_SPATIAL_CROPS)
            min_scale, max_scale, crop_size = [self.cfg.DATA.TEST_CROP_SIZE
                                               ] * 3
            # The testing is deterministic and no jitter should be performed.
            # min_scale, max_scale, and crop_size are expect to be the same.
            assert len({min_scale, max_scale, crop_size}) == 1
        else:
            raise NotImplementedError("Does not support {} mode".format(
                self.mode))

        label = self._labels[index]

        #         seq = self.get_seq_frames(index)
        seq = self.get_seq_frames_new(index)

        frames = torch.as_tensor(
            utils.retry_load_images(
                [self._path_to_videos[index][frame] for frame in seq],
                self._num_retries,
            ))

        # Perform color normalization.
        frames = utils.tensor_normalize(frames, self.cfg.DATA.MEAN,
                                        self.cfg.DATA.STD)

        # T H W C -> C T H W.
        frames = frames.permute(3, 0, 1, 2)
        # Perform data augmentation.
        frames = utils.spatial_sampling_new(
            frames,
            self.cfg,
            spatial_idx=spatial_sample_index,
            min_scale=min_scale,
            max_scale=max_scale,
            crop_size=crop_size,
            random_horizontal_flip=self.cfg.DATA.RANDOM_FLIP,
            inverse_uniform_sampling=self.cfg.DATA.INV_UNIFORM_SAMPLE,
        )
        frames = utils.pack_pathway_output(self.cfg, frames)
        return frames, label, index, {}
import torch
import numpy as np
import matplotlib.pyplot as plt
import torch.autograd.profiler as profiler

import util

if __name__ == '__main__':
    n = 5
    s = 121
    y = torch.as_tensor(util.gaussian_shaped_labels(4, (s, s)))

    response = torch.zeros((n, 1, s, s))
    response[0, 0, 60, 60] = 100
    response[1, 0, 0, 60] = 100
    response[2, 0, 60, 0] = 100
    response[3, 0, 30, 100] = 100
    response[4, 0, 80, 90] = 100

    with profiler.profile(use_cuda=True,
                          record_shapes=True,
                          profile_memory=True,
                          with_stack=True) as p:
        # with profiler.record_function('model_inference'):
        fake_y = util.create_fake_y(y, response)

    print(
        p.key_averages(group_by_stack_n=5).table(
            sort_by="self_cuda_time_total", row_limit=-1))

    fig, ax = plt.subplots(2, n)
    def merge_bg(self, img, label, bg_dir):
        
        sometimes = lambda aug: iaa.Sometimes(0.5, aug)

        # merge bg
        random_bg = np.random.choice([0, 1, 2])
        list_sample_bg = glob.glob(bg_dir+'/*.jpg') + glob.glob(bg_dir+'/*/*.jpg') + glob.glob(bg_dir+'/*/*/*.jpg')  + glob.glob(bg_dir+'/*/*/*/*.jpg') 
        
        # 0: original 
        # 1: list_sample_bg
        # 2: pure bg
        if random_bg == 1:
            bg_path = np.random.choice(list_sample_bg)
            bg = cv2.imread(bg_path, cv2.IMREAD_COLOR)
            bg = cv2.resize(bg, (self.input_size, self.input_size), interpolation=cv2.INTER_LINEAR)
            # bg = bg.astype(np.float32) # [:, :, ::-1] # RGB to BGR!!!
        elif random_bg == 2:
            # Generate Bg
            bg = np.random.randint(255, size=3)
            bg = bg.reshape(1,1,3)
            bg = np.repeat(bg, self.input_size, axis=0)
            bg = np.repeat(bg, self.input_size, axis=1)
            bg = bg.astype(np.uint8)
            # Augmentation
            bg_seq = iaa.Sequential(
                [
                    # execute 0 to 5 of the following (less important) augmenters per image
                    # don't execute all of them, as that would often be way too strong
                    iaa.SomeOf((1, 6),
                        [
                            sometimes(iaa.Superpixels(p_replace=(0, 1.0), n_segments=(20, 200))), # convert images into their superpixel representation
                            iaa.OneOf([
                                iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0
                                iaa.AverageBlur(k=(2, 7)), # blur image using local means with kernel sizes between 2 and 7
                                iaa.MedianBlur(k=(3, 11)), # blur image using local medians with kernel sizes between 2 and 7
                            ]),
                            iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images
                            iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images
                            # search either for all edges or for directed edges,
                            # blend the result with the original image using a blobby mask
                            iaa.SimplexNoiseAlpha(iaa.OneOf([
                                iaa.EdgeDetect(alpha=(0.5, 1.0)),
                                iaa.DirectedEdgeDetect(alpha=(0.5, 1.0), direction=(0.0, 1.0)),
                            ])),
                            iaa.OneOf([
                                iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # add gaussian noise to images
                                iaa.AdditiveLaplaceNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
                                iaa.AdditivePoissonNoise(lam=(0.0, 4.0), per_channel=0.5)
                            ]),
                            iaa.OneOf([
                                iaa.Dropout((0.01, 0.1), per_channel=0.5), # randomly remove up to 10% of the pixels
                                iaa.CoarseDropout((0.03, 0.2), size_percent=(0.02, 0.05), per_channel=0.2),
                            ]),
                            iaa.Invert(0.1, per_channel=True), # invert color channels
                            iaa.AddToHueAndSaturation((-20, 20)), # change hue and saturation
                            # either change the brightness of the whole image (sometimes
                            # per channel) or change the brightness of subareas
                            iaa.OneOf([
                                iaa.Multiply((0.5, 1.5), per_channel=0.5),
                                iaa.FrequencyNoiseAlpha(
                                    exponent=(-4, 0),
                                    first=iaa.Multiply((0.5, 1.5), per_channel=True),
                                    second=iaa.ContrastNormalization((0.5, 2.0))
                                )
                            ]),
                            iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), # improve or worsen the contrast
                            sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths)
                            iaa.Add((-25, 25), per_channel=0.5), # change brightness of images (by -10 to 10 of original value)
                            iaa.OneOf([
                                iaa.ImpulseNoise((0.01, 0.1)),
                                iaa.SaltAndPepper((0.01, 0.1), per_channel=0.2),
                            ]),
                            iaa.JpegCompression(),

                        ],
                        random_order=True
                    ),
                    iaa.OneOf([
                        iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0
                        iaa.AverageBlur(k=(2, 7)), # blur image using local means with kernel sizes between 2 and 7
                        iaa.MedianBlur(k=(3, 11)), # blur image using local medians with kernel sizes between 2 and 7
                        iaa.JpegCompression(),
                        iaa.Multiply((0.5, 1.5), per_channel=0.5),
                        iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5),
                    ]),
                ],
                random_order=False
            )
            bg = bg_seq.augment_image(bg)

        if random_bg >= 1:
            bg = torch.as_tensor(bg.astype(np.float32))
            bg = torch.transpose(torch.transpose(bg, 1, 2), 0, 1)
            img = img * label + bg * (1 - label)
        
        return img
    # Bacon
Example #38
0
def torch_trilinear_interpolation(
    volume: torch.Tensor,
    coords: torch.Tensor,
) -> torch.Tensor:
    """Evaluates the data volume at given coordinates using trilinear
    interpolation on a torch tensor.

    Interpolation is done using the device on which the volume is stored.

    Parameters
    ----------
    volume : torch.Tensor with 3D or 4D shape
        The input volume to interpolate from
    coords : torch.Tensor with shape (N,3)
        The coordinates where to interpolate

    Returns
    -------
    output : torch.Tensor with shape (N, #modalities)
        The list of interpolated values

    References
    ----------
    [1] https://spie.org/samples/PM159.pdf
    """
    # Get device, and make sure volume and coords are using the same one
    assert volume.device == coords.device, "volume on device: {}; " \
                                           "coords on device: {}".format(
                                               volume.device,
                                               coords.device)
    coords = coords.type(torch.float32)
    volume = volume.type(torch.float32)

    device = volume.device
    local_idx = idx[:]

    # Send data to device
    idx_torch = torch.as_tensor(local_idx, dtype=torch.float, device=device)
    B1_torch = torch.as_tensor(B1, dtype=torch.float, device=device)

    if volume.dim() <= 2 or volume.dim() >= 5:
        raise ValueError("Volume must be 3D or 4D!")

    if volume.dim() == 3:
        # torch needs indices to be cast to long
        indices_unclipped = (coords[:, None, :] + idx_torch).reshape(
            (-1, 3)).long()

        # Clip indices to make sure we don't go out-of-bounds
        lower = torch.as_tensor([0, 0, 0]).to(device)
        upper = (torch.as_tensor(volume.shape) - 1).to(device)
        indices = torch.min(torch.max(indices_unclipped, lower), upper)

        # Fetch volume data at indices
        P = volume[indices[:, 0], indices[:, 1], indices[:, 2]].reshape(
            (coords.shape[0], -1)).t()

        d = coords - torch.floor(coords)
        dx, dy, dz = d[:, 0], d[:, 1], d[:, 2]
        Q1 = torch.stack([
            torch.ones_like(dx), dx, dy, dz, dx * dy, dy * dz, dx * dz,
            dx * dy * dz
        ],
                         dim=0)
        output = torch.sum(P * torch.mm(B1_torch.t(), Q1), dim=0)

        return output

    if volume.dim() == 4:
        # 8 coordinates of the corners of the cube, for each input coordinate
        indices_unclipped = torch.floor(coords[:, None, :] +
                                        idx_torch).reshape((-1, 3)).long()

        # Clip indices to make sure we don't go out-of-bounds
        lower = torch.as_tensor([0, 0, 0], device=device)
        upper = torch.as_tensor(volume.shape[:3], device=device) - 1
        indices = torch.min(torch.max(indices_unclipped, lower), upper)

        # Fetch volume data at indices
        P = volume[indices[:, 0], indices[:, 1], indices[:, 2], :].reshape(
            (coords.shape[0], 8, volume.shape[-1]))

        d = coords - torch.floor(coords)
        dx, dy, dz = d[:, 0], d[:, 1], d[:, 2]
        Q1 = torch.stack([
            torch.ones_like(dx), dx, dy, dz, dx * dy, dy * dz, dx * dz,
            dx * dy * dz
        ],
                         dim=0)
        output = torch.sum(P * torch.mm(B1_torch.t(), Q1).t()[:, :, None],
                           dim=1)

        return output.type(torch.float32)

    raise ValueError(
        "There was a problem with the volume's number of dimensions!")
    def __call__(self, dataset_dict):
        """
        Args:
            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.

        Returns:
            dict: a format that builtin models in detectron2 accept
        """
        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
        # USER: Write your own image loading if it's not from a file
        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
        utils.check_image_size(dataset_dict, image)

        if "annotations" not in dataset_dict:
            image, transforms = T.apply_transform_gens(
                ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image
            )
        else:
            # Crop around an instance if there are instances in the image.
            # USER: Remove if you don't use cropping
            if self.crop_gen:
                crop_tfm = utils.gen_crop_transform_with_instance(
                    self.crop_gen.get_crop_size(image.shape[:2]),
                    image.shape[:2],
                    np.random.choice(dataset_dict["annotations"]),
                )
                image = crop_tfm.apply_image(image)
            image, transforms = T.apply_transform_gens(self.tfm_gens, image)
            if self.crop_gen:
                transforms = crop_tfm + transforms

        image_shape = image.shape[:2]  # h, w

        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
        # Therefore it's important to use torch.Tensor.
        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))

        # USER: Remove if you don't use pre-computed proposals.
        if self.load_proposals:
            utils.transform_proposals(
                dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk
            )

        # Tin
        # fast_autoaug 1
        if not self.is_train and not self.autoaug:
        # Bacon
            # USER: Modify this if you want to keep them for some reason.
            dataset_dict.pop("annotations", None)
            dataset_dict.pop("sem_seg_file_name", None)
            return dataset_dict

        if "annotations" in dataset_dict:
            # USER: Modify this if you want to keep them for some reason.
            for anno in dataset_dict["annotations"]:
                if not self.mask_on:
                    anno.pop("segmentation", None)
                if not self.keypoint_on:
                    anno.pop("keypoints", None)

            # USER: Implement additional transformations if you have other types of data
            annos = [
                utils.transform_instance_annotations(
                    obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
                )
                for obj in dataset_dict.pop("annotations")
                if obj.get("iscrowd", 0) == 0
            ]
            instances = utils.annotations_to_instances(
                annos, image_shape, mask_format=self.mask_format
            )
            # Create a tight bounding box from masks, useful when image is cropped
            if self.crop_gen and instances.has("gt_masks"):
                instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
            dataset_dict["instances"] = utils.filter_empty_instances(instances)
        
        # USER: Remove if you don't do semantic/panoptic segmentation.
        if "sem_seg_file_name" in dataset_dict:
            a = dataset_dict["sem_seg_file_name"]
            with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f:
                sem_seg_gt = Image.open(f)
                sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8")
                # Tin
                # matting 0
                sem_seg_gt = sem_seg_gt.copy()
                if sem_seg_gt.max() == 0:
                    assert 0, "label max 0"
                if self.matting:
                    # the matting datasets' labels are not unification
                    if sem_seg_gt.max() == 1:
                        sem_seg_gt = sem_seg_gt * 255
                # sem_fpn 0
                if self.binary:
                    sem_seg_gt[sem_seg_gt==255] = 1
                    sem_seg_gt = sem_seg_gt[:, :, 0].copy()
                # Bacon
            sem_seg_gt = transforms.apply_segmentation(sem_seg_gt)
            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
            # Tin
            # matting 1
            if self.matting or self.binary:
                sem_seg_gt[sem_seg_gt==255] = -1
                sem_seg_gt[sem_seg_gt>0] = 1
                sem_seg_gt[sem_seg_gt==-1] = 255
            # Bacon
            dataset_dict["sem_seg"] = sem_seg_gt

            if not self.is_train:
                import cv2
                cv2.imwrite("/home/pgding/project/LIP/detectron2_bdd/tools/output/tmp/"+a.split("/")[-1].replace(".png", "_orig.jpg"), dataset_dict["image"].numpy().transpose(1, 2, 0))
            # if self.is_train:
            #     s = sem_seg_gt.numpy().copy()
            #     s[s==1] = 100
            #     cv2.imwrite("/home/pgding/project/LIP/detectron2_bdd/tools/output/tmp/"+a.split("/")[-1].replace(".png", "_train_mp.jpg"), s)
        # Tin
        # matting 2
        if self.is_train and self.matting:

            img = self.merge_bg(dataset_dict["image"], dataset_dict["sem_seg"], dataset_dict["bg_file_dir"])
            dataset_dict["image"] = img
        # Bacon
        return dataset_dict
Example #40
0
def evaluate_box_proposals(
    predictions, dataset, thresholds=None, area="all", limit=None
):
    """Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official COCO API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        "all": 0,
        "small": 1,
        "medium": 2,
        "large": 3,
        "96-128": 4,
        "128-256": 5,
        "256-512": 6,
        "512-inf": 7,
    }
    area_ranges = [
        [0 ** 2, 1e5 ** 2],  # all
        [0 ** 2, 32 ** 2],  # small
        [32 ** 2, 96 ** 2],  # medium
        [96 ** 2, 1e5 ** 2],  # large
        [96 ** 2, 128 ** 2],  # 96-128
        [128 ** 2, 256 ** 2],  # 128-256
        [256 ** 2, 512 ** 2],  # 256-512
        [512 ** 2, 1e5 ** 2],
    ]  # 512-inf
    assert area in areas, "Unknown area range: {}".format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = []
    num_pos = 0

    for image_id, prediction in enumerate(predictions):
        original_id = dataset.id_to_img_map[image_id]

        # TODO replace with get_img_info?
        image_width = dataset.coco.imgs[original_id]["width"]
        image_height = dataset.coco.imgs[original_id]["height"]
        prediction = prediction.resize((image_width, image_height))

        # sort predictions in descending order
        # TODO maybe remove this and make it explicit in the documentation
        inds = prediction.get_field("objectness").sort(descending=True)[1]
        prediction = prediction[inds]

        ann_ids = dataset.coco.getAnnIds(imgIds=original_id)
        anno = dataset.coco.loadAnns(ann_ids)
        gt_boxes = [obj["bbox"] for obj in anno if obj["iscrowd"] == 0]
        gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4)  # guard against no boxes
        gt_boxes = BoxList(gt_boxes, (image_width, image_height), mode="xywh").convert(
            "xyxy"
        )
        gt_areas = torch.as_tensor([obj["area"] for obj in anno if obj["iscrowd"] == 0])

        if len(gt_boxes) == 0:
            continue

        valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1])
        gt_boxes = gt_boxes[valid_gt_inds]

        num_pos += len(gt_boxes)

        if len(gt_boxes) == 0:
            continue

        if len(prediction) == 0:
            continue

        if limit is not None and len(prediction) > limit:
            prediction = prediction[:limit]

        overlaps = boxlist_iou(prediction, gt_boxes)

        _gt_overlaps = torch.zeros(len(gt_boxes))
        for j in range(min(len(prediction), len(gt_boxes))):
            # find which proposal box maximally covers each gt box
            # and get the iou amount of coverage for each gt box
            max_overlaps, argmax_overlaps = overlaps.max(dim=0)

            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ovr, gt_ind = max_overlaps.max(dim=0)
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1

        # append recorded iou coverage level
        gt_overlaps.append(_gt_overlaps)
    gt_overlaps = torch.cat(gt_overlaps, dim=0)
    gt_overlaps, _ = torch.sort(gt_overlaps)

    if thresholds is None:
        step = 0.05
        thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32)
    recalls = torch.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {
        "ar": ar,
        "recalls": recalls,
        "thresholds": thresholds,
        "gt_overlaps": gt_overlaps,
        "num_pos": num_pos,
    }