Esempio n. 1
0
    def step(self, grad, index=None):
        # State initialization
        if hasattr(self, 'ea') == False:
            self.init(grad.size())

        exp_avg = get_index(self.ea, index)
        exp_avg_sq = get_index(self.eas, index)
        beta1, beta2 = self.betas[0], self.betas[1]

        self.epoch += 1

        if self.amsgrad:
            max_exp_avg_sq = get_index(self.meas, index)

        bias_correction1 = 1 - beta1**self.epoch
        bias_correction2 = 1 - beta2**self.epoch

        # Decay the first and second moment running average coefficient
        exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
        exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
        if self.amsgrad:
            # Maintains the maximum of all 2nd moment running avg. till now
            torch.maximum(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
            # Use the max. for normalizing running avg. of gradient
            denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(
                self.eps)
        else:
            denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(
                self.eps)

        step_size = self.lr / bias_correction1

        return -step_size * exp_avg / denom
Esempio n. 2
0
 def s_update(self, tau, iota, zeta, alpha, z, a, X):
     pi1 = -tau + iota - 1 + zeta
     pi2 = -alpha - z.reshape(-1, 1) + z.reshape(1, -1) + th.sum(
         a * X, dim=1).reshape(-1, 1) - th.matmul(a, X.T)
     s = 1 / 2 * (pi2 + iota * pi1 - iota * th.maximum(
         pi1 - iota * pi2, th.zeros(1, device=self.device)))
     return th.maximum(s, zero)
Esempio n. 3
0
    def forward(self, batch_titems, batch_citems):
        if self.weights is not None:
            batch_nitems = t.multinomial(self.weights, batch_titems.size()[0] * self.n_negs, replacement=True).view(batch_titems.size()[0], -1)
        else:
            batch_nitems = FT(batch_titems.size()[0], self.n_negs).uniform_(0, self.vocab_size - 1).long()
        if next(self.parameters()).is_cuda:
            batch_nitems = batch_nitems.to(self.device)

        batch_titems = t.cat([batch_titems.reshape(-1, 1), batch_nitems], 1)
        mask_pad_ids = (batch_citems == self.ai2v.pad_idx)
        batch_sub_users, _ = self.ai2v(batch_titems, batch_citems, mask_pad_ids)
        batch_tvecs = self.ai2v.Bt(self.ai2v.forward_t(batch_titems))

        sim = self.similarity(batch_sub_users, batch_tvecs, batch_titems)

        if self.loss_method == 'CCE':  # This option is the default option.
            soft = sim.softmax(dim=1) + 1e-6
            return -soft[:, 0].log().sum()

        if self.loss_method == 'BCE':
            soft_pos = sim[:, 0].sigmoid() + 1e-6
            soft_neg = sim[:, 1:].neg().sigmoid() + 1e-6
            return (-soft_pos.log().sum()) + (-soft_neg.log().sum())

        if self.loss_method == 'Hinge':
            soft_pos = t.maximum((t.ones_like(sim[:, 0]) - sim[:, 0]), t.zeros_like(sim[:, 0])) + 1e-6
            soft_neg = t.maximum((t.ones_like(sim[:, 1:]) - (-sim[:, 1:])), t.zeros_like(sim[:, 1:])) + 1e-6
            return soft_pos.sum() + soft_neg.sum()
 def forward(self, input: Tensor, target: Tensor) -> Tensor:
     label = torch.maximum(target, self.cutoff * torch.ones_like(target))
     pred = torch.maximum(input, self.cutoff * torch.ones_like(input))
     if self.sine_weight_max is None:
         return torch.nn.functional.smooth_l1_loss(
             pred / label,
             torch.ones_like(target),
             reduction=self.reduction,
             beta=self.beta) + torch.nn.functional.smooth_l1_loss(
                 label / pred,
                 torch.ones_like(target),
                 reduction=self.reduction,
                 beta=self.beta)
     else:
         wgt = torch.sin(
             np.pi / (2 * self.sine_weight_max) *
             torch.clip(label, self.cutoff, self.sine_weight_max))
         loss = torch.nn.functional.smooth_l1_loss(
             pred / label,
             torch.ones_like(target),
             reduction='none',
             beta=self.beta) + torch.nn.functional.smooth_l1_loss(
                 label / pred,
                 torch.ones_like(target),
                 reduction='none',
                 beta=self.beta)
         loss *= wgt
         # print(list(zip(label, wgt, loss)))
         if self.reduction == 'none':
             return loss
         elif self.reduction == 'mean':
             return loss.mean()
         elif self.reduction == 'sum':
             return loss.sum()
Esempio n. 5
0
    def merge(self, color_feat, depth_feat):

        feat = {}

        if self.merge_type == 'conv':
            feat['layer2'] = self.merge_layer2(torch.cat((color_feat['layer2'], depth_feat['layer2']), 1))
            feat['layer3'] = self.merge_layer3(torch.cat((color_feat['layer3'], depth_feat['layer3']), 1))

        elif self.merge_type == 'max':
            feat['layer2'] = torch.maximum(color_feat['layer2'], depth_feat['layer2'])
            feat['layer3'] = torch.maximum(color_feat['layer3'], depth_feat['layer3'])

        elif self.merge_type == 'mul':
            feat['layer2'] = torch.mul(color_feat['layer2'], depth_feat['layer2'])
            feat['layer3'] = torch.mul(color_feat['layer3'], depth_feat['layer3'])

        elif self.merge_type == 'mean':
            feat['layer2'] = 0.5 * color_feat['layer2'] + 0.5 * depth_feat['layer2']
            feat['layer3'] = 0.5 * color_feat['layer3'] + 0.5 * depth_feat['layer3']

        elif self.merge_type == 'weightedSum':
            feat['layer2'] = self.W_rgb * color_feat['layer2'] + self.W_depth * depth_feat['layer2']
            feat['layer3'] = self.W_rgb * color_feat['layer3'] + self.W_depth * depth_feat['layer3']

        return feat
Esempio n. 6
0
    def small_IoU(box_i, box_j):

        device = box_i.device
        dtype = box_i.dtype

        pw_a = box_i[2] - box_i[0]
        ph_a = box_i[3] - box_i[1]
        area_p = (pw_a * ph_a)

        bw_a = box_j[2] - box_j[0]
        bh_a = box_j[3] - box_j[1]
        area_b = (bw_a * bh_a)

        area_u = area_p + area_b

        x_val = torch.minimum(box_i[2], box_j[2]) - torch.maximum(
            box_i[0], box_j[0])
        x_val_zero = torch.zeros(x_val.shape, device=device, dtype=dtype)
        y_val = torch.minimum(box_i[3], box_j[3]) - torch.maximum(
            box_i[1], box_j[1])
        y_val_zero = torch.zeros(y_val.shape, device=device, dtype=dtype)

        area_i = torch.maximum(x_val, x_val_zero) * torch.maximum(
            y_val, y_val_zero)
        area_u -= area_i

        return area_i / area_u
Esempio n. 7
0
def broadcast_ioutf(box1, box2):
    # (batch, 507, 1, 4)
    box_1 = torch.unsqueeze(box1, dim=-2)
    # (batch, 1, 507, 4)
    box_2 = torch.unsqueeze(box2, dim=-3)
    # (batch, 507, 507, 4)
    boxa, boxb = torch.broadcast_tensors(box_1, box_2)

    al, at, ar, ab = torch.chunk(boxa, 4, dim=-1)
    bl, bt, br, bb = torch.chunk(boxb, 4, dim=-1)

    left = torch.maximum(al, bl)
    right = torch.maximum(ar, br)
    top = torch.maximum(at, bt)
    bottom = torch.maximum(ab, bb)

    iw = torch.clamp(right - left, min=0, max=1)
    ih = torch.clamp(bottom - top, min=0, max=1)

    intersect = iw * ih
    area_a = (ar - al) * (ab - at)
    area_b = (br - bl) * (bb - bt)
    union = area_a + area_b - intersect

    iou = torch.squeeze(intersect / union + 1e-7, dim=-1)

    return iou
def compute_iou(pred, gt):
    """ Calculates IoU (Jaccard index) of two sets of bboxes:
            IOU = pred ∩ gt / (area(pred) + area(gt) - pred ∩ gt)
        Parameters:
            Coordinates of bboxes are supposed to be in the following form: [x1, y1, x2, y2]
            pred (torch.tensor): predicted bboxes
            gt (torch.tensor): ground truth bboxes
        Return value:
            iou (torch.tensor): intersection over union
    """
    def get_box_area(box):
        return (box[:, 2] - box[:, 0] + 1.) * (box[:, 3] - box[:, 1] + 1.)

    #_gt = torch.tile(gt, (pred.shape[0], 1))
    _gt = gt.repeat(pred.shape[0], 1)
    _pred = torch.repeat_interleave(pred, gt.shape[0], dim=0)

    ixmin = torch.maximum(_gt[:, 0], _pred[:, 0])
    iymin = torch.maximum(_gt[:, 1], _pred[:, 1])
    ixmax = torch.minimum(_gt[:, 2], _pred[:, 2])
    iymax = torch.minimum(_gt[:, 3], _pred[:, 3])

    width = torch.maximum(ixmax - ixmin + 1., torch.tensor(0))
    height = torch.maximum(iymax - iymin + 1., torch.tensor(0))

    intersection_area = width * height
    union_area = get_box_area(_gt) + get_box_area(_pred) - intersection_area
    iou = (intersection_area / union_area).reshape(pred.shape[0], gt.shape[0])
    return iou
Esempio n. 9
0
def AlignTripLoss(fea1, fea2, mask1, mask2):
    # fea1_size (bs, max_len1, dim)  mask1_size (bs, max_len1)
    # fea2_size (bs, max_len2, dim)  mask2_size (bs, max_len2)
    # '-Inf' for padded item, '0' for others
    fea1 = F.normalize(fea1, p=2, dim=-1)
    fea2 = F.normalize(fea2, p=2, dim=-1)

    # match fea1 to fea2
    sim_pos1 = cal_sim(fea1, fea2, mask1, mask2)  # (bs)
    # (bs, 1, max_len1, dim)  (1, bs, max_len2, dim)
    sim_neg1_all = cal_sim_all(fea1.unsqueeze(1), fea2.unsqueeze(0), mask1,
                               mask2)  # (bs,bs)
    unmask = torch.eye(sim_pos1.size(0),
                       dtype=torch.float32,
                       device=sim_pos1.device)
    unmask = torch.where(unmask == 1,
                         torch.tensor([float('-Inf')], device=unmask.device),
                         unmask)

    sim_neg1, _ = torch.max(sim_neg1_all + unmask, 1)
    loss1 = -sim_pos1 + sim_neg1 + 0.2
    loss1 = torch.maximum(loss1, torch.zeros_like(loss1)).mean()

    # match fea2 to fea1
    sim_pos2 = cal_sim(fea2, fea1, mask2, mask1)  # (bs)
    # (bs, 1, max_len1, dim)  (1, bs, max_len2, dim)
    sim_neg2_all = cal_sim_all(fea2.unsqueeze(1), fea1.unsqueeze(0), mask2,
                               mask1)  # (bs,bs)
    sim_neg2, _ = torch.max(sim_neg2_all + unmask, 1)
    loss2 = -sim_pos2 + sim_neg2 + 0.2
    loss2 = torch.maximum(loss2, torch.zeros_like(loss2)).mean()

    loss = loss1 + loss2

    return loss
Esempio n. 10
0
File: amsgrad.py Progetto: wx-b/nn
    def get_mv(self, state: Dict[str, any], group: Dict[str, any],
               grad: torch.Tensor):
        """
        ### Calculate $m_t$ and and $v_t$ or $\max(v_1, v_2, ..., v_{t-1}, v_t)$

        * `state` is the optimizer state of the parameter (tensor)
        * `group` stores optimizer attributes of the parameter group
        * `grad` is the current gradient tensor $g_t$ for the parameter $\theta_{t-1}$
        """

        # Get $m_t$ and $v_t$ from *Adam*
        m, v = super().get_mv(state, group, grad)

        # If this parameter group is using `amsgrad`
        if group['amsgrad']:
            # Get $\max(v_1, v_2, ..., v_{t-1})$.
            #
            # 🗒 The paper uses the notation $\hat{v}_t$ for this, which we don't use
            # that here because it confuses with the Adam's usage of the same notation
            # for bias corrected exponential moving average.
            v_max = state['max_exp_avg_sq']
            # Calculate $\max(v_1, v_2, ..., v_{t-1}, v_t)$.
            #
            # 🤔 I feel you should be taking / maintaining the max of the bias corrected
            # second exponential average of squared gradient.
            # But this is how it's
            # [implemented in PyTorch also](https://github.com/pytorch/pytorch/blob/19f4c5110e8bcad5e7e75375194262fca0a6293a/torch/optim/functional.py#L90).
            # I guess it doesn't really matter since bias correction only increases the value
            # and it only makes an actual difference during the early few steps of the training.
            torch.maximum(v_max, v, out=v_max)

            return m, v_max
        else:
            # Fall back to *Adam* if the parameter group is not using `amsgrad`
            return m, v
Esempio n. 11
0
 def linear_to_log_scale_with_dynamic_range(self, spectrogram):
     # TODO remove? Dynamic range might be a bad idea for VAEs... (need to reconstruct the 'floor' value)
     assert self.dynamic_range_dB is not None  # Dynamic range not provided? It might be counterproductive anyway
     spectrogram = torch.maximum(spectrogram, torch.ones(spectrogram.size()) * 10 ** (self.min_dB / 20.0))
     spectrogram = 20.0 * torch.log10(spectrogram)
     return torch.maximum(spectrogram,
                          torch.ones(spectrogram.size()) * (torch.max(spectrogram) - self.dynamic_range_dB))
Esempio n. 12
0
        def compute_crop_pad_image_location(
            bbox_tight: "torch.Tensor", image: "torch.Tensor"
        ) -> Tuple["torch.Tensor", "torch.Tensor", "torch.Tensor",
                   "torch.Tensor"]:
            """
            Get the valid image coordinates for the context region in target or search region in full image

            :param bbox_tight: Coordinates of bounding box [x1, y1, x2, y2].
            :param image: Frame to be cropped and padded.
            :return: x-coordinate of the bounding box center.
            """

            # Center of the bounding box
            # bbox_center_x = bbox_tight.get_center_x()
            # bbox_center_y = bbox_tight.get_center_y()
            bbox_center_x = get_center_x_f(bbox_tight)
            bbox_center_y = get_center_y_f(bbox_tight)

            image_height = image.shape[0]
            image_width = image.shape[1]

            # Padded output width and height
            # output_width = bbox_tight.compute_output_width()
            # output_height = bbox_tight.compute_output_height()
            output_width = compute_output_width_f(bbox_tight)
            output_height = compute_output_height_f(bbox_tight)

            roi_left = torch.maximum(
                torch.tensor(0.0).to(self.device),
                bbox_center_x - (output_width / 2.0))
            roi_bottom = torch.maximum(
                torch.tensor(0.0).to(self.device),
                bbox_center_y - (output_height / 2.0))

            # New ROI width
            # -------------
            # 1. left_half should not go out of bound on the left side of the
            # image
            # 2. right_half should not go out of bound on the right side of the
            # image
            left_half = torch.minimum(output_width / 2.0, bbox_center_x)
            right_half = torch.minimum(output_width / 2.0,
                                       image_width - bbox_center_x)
            roi_width = torch.maximum(
                torch.tensor(1.0).to(self.device), left_half + right_half)

            # New ROI height
            # Similar logic applied that is applied for 'New ROI width'
            top_half = torch.minimum(output_height / 2.0, bbox_center_y)
            bottom_half = torch.minimum(output_height / 2.0,
                                        image_height - bbox_center_y)
            roi_height = torch.maximum(
                torch.tensor(1.0).to(self.device), top_half + bottom_half)

            # Padded image location in the original image
            # objPadImageLocation = BoundingBox(roi_left, roi_bottom, roi_left + roi_width, roi_bottom + roi_height)
            #
            # return objPadImageLocation
            return roi_left, roi_bottom, roi_left + roi_width, roi_bottom + roi_height
Esempio n. 13
0
 def forward(self, p):
     q = (self.transform(p.reshape(-1, 3).unsqueeze(0)) -
          self.centers.unsqueeze(1)).abs() - self.b.unsqueeze(1)
     up = q.clamp(min=1e-7).norm(p=2, dim=-1, keepdim=True)
     x, y, z = q.split(1, dim=-1)
     down = torch.maximum(x, torch.maximum(y, z)).clamp(max=-1e-7)
     sd = up + down
     return smooth_min(sd, k=16.).reshape(p.shape[:-1])
Esempio n. 14
0
def limiter(cr):
    return torch.maximum(
        torch.tensor([0.0], device=cr.device),
        torch.maximum(
            torch.minimum(torch.tensor([1.0], device=cr.device), 2 * cr),
            torch.minimum(torch.tensor([2.0], device=cr.device), cr),
        ),
    )
Esempio n. 15
0
    def no_object_loss(truth_boxes_grid,
                       truth_mask,
                       pred_xy,
                       pred_wh,
                       pred_conf,
                       noobj_iou_thres=0.5):
        # Predictions
        # [b,GSZ,GSZ,N_anchor,2] => [b,GSZ,GSZ,N_anchor, 1, 2]
        pred_xy = torch.unsqueeze(pred_xy, dim=4)
        # [b,GSZ,GSZ,N_anchor,2] => [b,GSZ,GSZ,N_anchor, 1, 2]
        pred_wh = torch.unsqueeze(pred_wh, dim=4)
        pred_wh_half = pred_wh / 2.
        pred_xymin = pred_xy - pred_wh_half  # [b,GSZ,GSZ,N_anchor, 1, 2]
        pred_xymax = pred_xy + pred_wh_half  # [b,GSZ,GSZ,N_anchor, 1, 2]

        # Ground Truth
        # [b, n_labels, 5] => [b, 1, 1, 1, n_labels, 5]
        b, n_lbs, len_box = truth_boxes_grid.shape
        true_boxes_grid = truth_boxes_grid.view(b, 1, 1, 1, n_lbs, len_box)
        true_xy = true_boxes_grid[..., 0:2]  # [b, 1, 1, 1, n_labels, 2]
        true_wh = true_boxes_grid[..., 2:4]  # [b, 1, 1, 1, n_labels, 2]
        true_wh_half = true_wh / 2.
        true_xymin = true_xy - true_wh_half
        true_xymax = true_xy + true_wh_half

        # Compute non object loss from predxymin, predxymax, true_xymin, true_xymax
        # [b,GSZ,GSZ,N_anchor,1,2] vs [b,1,1,1,296,2] =>[b,GSZ,GSZ,N_anchor,296,2]
        intersectxymin = torch.maximum(pred_xymin, true_xymin)
        # [b,GSZ,GSZ,N_anchor,1,2] vs [b,1,1,1,296,2] =>[b,GSZ,GSZ,N_anchor,296,2]
        intersectxymax = torch.minimum(pred_xymax, true_xymax)
        # [b,GSZ,GSZ,N_anchor,296,2]
        intersect_wh = torch.maximum(intersectxymax - intersectxymin,
                                     torch.zeros_like(intersectxymax))
        # [b,GSZ,GSZ,N_anchor,296]*[b,GSZ,GSZ,N_anchor,296] =>[b,GSZ,GSZ,N_anchor,296]
        intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
        # [b,GSZ,GSZ,N_anchor] * [b,GSZ,GSZ,N_anchor]
        pred_area = pred_wh[..., 0] * pred_wh[..., 1]
        # [b,1,1,1,296] * [b,1,1,1,296]
        true_area = true_wh[..., 0] * true_wh[..., 1]
        # [b,GSZ,GSZ,N_anchor,1]+[b,1,1,1,296] -[b,GSZ,GSZ,N_anchor,296]=>[b,GSZ,GSZ,N_anchor,296]
        union_area = pred_area + true_area - intersect_area
        # [b,GSZ,GSZ,N_anchor,296]
        iou_score = intersect_area / union_area
        # [b,GSZ,GSZ,N_anchor] => [b,GSZ,GSZ,N_anchor,1]
        best_iou = torch.amax(iou_score, dim=4).unsqueeze(-1)

        noobj_detection = (best_iou < noobj_iou_thres).float()
        noobj_mask = noobj_detection * (1 - truth_mask)

        # noobj counter
        n_noobj = torch.sum((noobj_mask > 0.).to(torch.float32))
        noobj_loss = -torch.sum(noobj_mask * torch.log(1 - pred_conf)) / (
            n_noobj + 1e-6)
        return noobj_loss
Esempio n. 16
0
def convert_box(bbox):
    ans = torch.zeros_like(bbox)
    x = bbox[..., 0]
    y = bbox[..., 1]
    w = bbox[..., 2]
    h = bbox[..., 3]
    ans[..., 0] = torch.maximum(x - w / 2, torch.zeros_like(x))
    ans[..., 1] = torch.maximum(y - h / 2, torch.zeros_like(y))
    ans[..., 2] = x + w / 2
    ans[..., 3] = y + h / 2
    return ans
Esempio n. 17
0
    def step(self, closure=None):
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            weight_decay = group['weight_decay']
            momentum = group['momentum']
            dampening = group['dampening']
            nesterov = group['nesterov']

            # Extra values for clipping
            clipping = group['clipping']
            eps = group['eps']

            for p in group['params']:
                if p.grad is None:
                    continue
                d_p = p.grad

                # =========================
                # Gradient clipping
                if clipping is not None:
                    param_norm = torch.maximum(unitwise_norm(p),
                                               torch.tensor(eps).to(p.device))
                    grad_norm = unitwise_norm(d_p)
                    max_norm = param_norm * group['clipping']

                    trigger_mask = grad_norm > max_norm
                    clipped_grad = p.grad * (max_norm / torch.maximum(
                        grad_norm,
                        torch.tensor(1e-6).to(p.device)))
                    d_p = torch.where(trigger_mask, clipped_grad, d_p)
                # =========================

                if weight_decay != 0:
                    d_p = d_p.add(p, alpha=weight_decay)
                if momentum != 0:
                    param_state = self.state[p]
                    if 'momentum_buffer' not in param_state:
                        buf = param_state['momentum_buffer'] = torch.clone(
                            d_p).detach()
                    else:
                        buf = param_state['momentum_buffer']
                        buf.mul_(momentum).add_(d_p, alpha=1 - dampening)
                    if nesterov:
                        d_p = d_p.add(buf, alpha=momentum)
                    else:
                        d_p = buf

                p.add_(d_p, alpha=-group['lr'])

        return loss
Esempio n. 18
0
def adamp(
    params: List[Tensor],
    grads: List[Tensor],
    exp_avgs: List[Tensor],
    exp_avg_sqs: List[Tensor],
    max_exp_avg_sqs: List[Tensor],
    state_steps: List[int],
    amsgrad: bool,
    beta1: float,
    beta2: float,
    lr: float,
    weight_decay: float,
    eps: float,
    delta: float,
) -> None:
    r"""Functional API that performs AdamP algorithm computation.
    See :class:`~holocron.optim.AdamP` for details.
    """

    for i, param in enumerate(params):

        grad = grads[i]
        exp_avg = exp_avgs[i]
        exp_avg_sq = exp_avg_sqs[i]
        step = state_steps[i]

        bias_correction1 = 1 - beta1**step
        bias_correction2 = 1 - beta2**step

        if weight_decay != 0:
            grad = grad.add(param, alpha=weight_decay)

        # Decay the first and second moment running average coefficient
        exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
        exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
        if amsgrad:
            # Maintains the maximum of all 2nd moment running avg. till now
            torch.maximum(max_exp_avg_sqs[i],
                          exp_avg_sq,
                          out=max_exp_avg_sqs[i])
            # Use the max. for normalizing running avg. of gradient
            denom = (max_exp_avg_sqs[i].sqrt() /
                     math.sqrt(bias_correction2)).add_(eps)
        else:
            denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(eps)

        # Extra step
        pt = exp_avg / bias_correction1 / denom
        if F.cosine_similarity(param.data.view(1, -1), grad.view(
                1, -1)).max() < delta / math.sqrt(param.data.numel()):
            normalized_param = param.data / param.data.norm().add_(eps)
            pt -= (normalized_param * pt).sum() * normalized_param.data

        param.add_(pt, alpha=-lr)
Esempio n. 19
0
def tadam(params: List[Tensor],
          grads: List[Tensor],
          exp_avgs: List[Tensor],
          exp_avg_sqs: List[Tensor],
          max_exp_avg_sqs: List[Tensor],
          W_ts: List[Tensor],
          state_steps: List[int],
          amsgrad: bool,
          beta1: float,
          beta2: float,
          lr: float,
          weight_decay: float,
          eps: float,
          dof: float):
    r"""Functional API that performs TAdam algorithm computation.
    See :class:`~holocron.optim.TAdam` for details.
    """

    for i, param in enumerate(params):

        grad = grads[i]
        exp_avg = exp_avgs[i]
        exp_avg_sq = exp_avg_sqs[i]
        W_t = W_ts[i]
        _dof = param.data.numel() if dof is None else dof
        step = state_steps[i]
        if amsgrad:
            max_exp_avg_sq = max_exp_avg_sqs[i]

        bias_correction1 = 1 - beta1 ** step
        bias_correction2 = 1 - beta2 ** step

        if weight_decay != 0:
            grad = grad.add(param, alpha=weight_decay)

        # Decay the first and second moment running average coefficient
        w_t = grad.sub(exp_avg).pow_(2).div_(exp_avg_sq.add(eps)).sum()
        w_t.add_(_dof).pow_(-1).mul_(_dof + param.data.numel())
        exp_avg.mul_(W_t / (W_t + w_t)).addcdiv_(grad, W_t + w_t, value=w_t)
        W_t.mul_((2 * beta1 - 1) / beta1)
        W_t.add_(w_t)
        exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
        if amsgrad:
            # Maintains the maximum of all 2nd moment running avg. till now
            torch.maximum(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
            # Use the max. for normalizing running avg. of gradient
            denom = (max_exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(eps)
        else:
            denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(eps)

        step_size = lr / bias_correction1

        param.addcdiv_(exp_avg, denom, value=-step_size)
def cos_sim(
    A,
    B,
    eps=1e-8,
):
    A_norm = A.norm(dim=-1, keepdim=True)
    B_norm = B.norm(dim=-1, keepdim=True)
    A_norm = torch.maximum(A_norm, eps * torch.ones(A_norm.shape))
    B_norm = torch.maximum(B_norm, eps * torch.ones(B_norm.shape))
    A = torch.div(A, A_norm)
    B = torch.div(B, B_norm)
    return torch.mm(A, B.T)
Esempio n. 21
0
def bbox_overlaps_ciou(bboxes1, bboxes2):
    bboxes1 = convert_box(bboxes1)
    bboxes2 = convert_box(bboxes2)
    rows = bboxes1.shape[0]
    cols = bboxes2.shape[0]
    cious = torch.zeros((rows, cols))
    if rows * cols == 0:
        return cious
    exchange = False
    if bboxes1.shape[0] > bboxes2.shape[0]:
        bboxes1, bboxes2 = bboxes2, bboxes1
        cious = torch.zeros((cols, rows))
        exchange = True

    w1 = bboxes1[:, 2] - bboxes1[:, 0]
    h1 = bboxes1[:, 3] - bboxes1[:, 1]
    w2 = bboxes2[:, 2] - bboxes2[:, 0]
    h2 = bboxes2[:, 3] - bboxes2[:, 1]

    area1 = w1 * h1
    area2 = w2 * h2

    center_x1 = (bboxes1[:, 2] + bboxes1[:, 0]) / 2
    center_y1 = (bboxes1[:, 3] + bboxes1[:, 1]) / 2
    center_x2 = (bboxes2[:, 2] + bboxes2[:, 0]) / 2
    center_y2 = (bboxes2[:, 3] + bboxes2[:, 1]) / 2
    inter_max_xy = torch.minimum(bboxes1[:, 2:], bboxes2[:, 2:])
    inter_min_xy = torch.maximum(bboxes1[:, :2], bboxes2[:, :2])
    out_max_xy = torch.maximum(bboxes1[:, 2:], bboxes2[:, 2:])
    out_min_xy = torch.minimum(bboxes1[:, :2], bboxes2[:, :2])

    inter = torch.clamp((inter_max_xy - inter_min_xy), min=0)
    inter_area = inter[:, 0] * inter[:, 1]
    inter_diag = (center_x2 - center_x1)**2 + (center_y2 - center_y1)**2
    outer = torch.clamp((out_max_xy - out_min_xy), min=0)
    outer_diag = (outer[:, 0]**2) + (outer[:, 1]**2)
    union = area1 + area2 - inter_area
    u = (inter_diag) / outer_diag
    iou = inter_area / union
    with torch.no_grad():
        arctan = torch.atan(w2 / h2) - torch.atan(w1 / h1)
        v = (4 / (math.pi**2)) * torch.pow(
            (torch.atan(w2 / h2) - torch.atan(w1 / h1)), 2)
        S = 1 - iou
        alpha = v / (S + v)
        w_temp = 2 * w1
    ar = (8 / (math.pi**2)) * arctan * ((w1 - w_temp) * h1)
    cious = iou - (u + alpha * ar)
    cious = torch.clamp(cious, min=-1.0, max=1.0)
    if exchange:
        cious = cious.T
    return cious
Esempio n. 22
0
    def update(self, elite_val):
        stalled = abs(elite_val - self.best_val) <= self.crit
        self.num_i = torch.where(stalled, self.num_i + 1, self.zeros)
        new_plateau = (self.num_i % 100 == 0) & (self.num_i != 0)
        self.num_plateaus = torch.where(new_plateau, self.num_plateaus + 1,
                                        self.num_plateaus)

        # update alpha and rho
        self.rho = torch.maximum(self.rho_min,
                                 self.rho_init * self.decay**self.num_plateaus)
        self.alpha = torch.maximum(
            self.alpha_min, self.alpha_init * self.decay**self.num_plateaus)

        self.best_val = torch.maximum(elite_val, self.best_val)
Esempio n. 23
0
def adam(
    params: List[Tensor],
    grads: List[Tensor],
    exp_avgs: List[Tensor],
    exp_avg_sqs: List[Tensor],
    max_exp_avg_sqs: List[Tensor],
    state_steps: List[int],
    amsgrad: bool,
    beta1: float,
    beta2: float,
    lr: float,
    weight_decay: float,
    eps: float,
    factor: float = 1.,
):
    r"""Functional API that performs Adam algorithm computation.

    See :class:`~torch.optim.Adam` for details.
    """

    for i, param in enumerate(params):

        grad = grads[i]
        exp_avg = exp_avgs[i]
        exp_avg_sq = exp_avg_sqs[i]
        step = state_steps[i]
        if amsgrad:
            max_exp_avg_sq = max_exp_avg_sqs[i]

        bias_correction1 = 1 - beta1**step
        bias_correction2 = 1 - beta2**step

        if weight_decay != 0:
            grad = grad.add(param, alpha=weight_decay)

        # Decay the first and second moment running average coefficient
        exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
        exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
        if amsgrad:
            # Maintains the maximum of all 2nd moment running avg. till now
            torch.maximum(max_exp_avg_sq, exp_avg_sq, out=max_exp_avg_sq)
            # Use the max. for normalizing running avg. of gradient
            denom = (max_exp_avg_sq.sqrt() /
                     math.sqrt(bias_correction2)).add_(eps)
        else:
            denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(eps)

        step_size = lr / bias_correction1

        param.addcdiv_(exp_avg, denom, value=-step_size * factor)
Esempio n. 24
0
def evaluate_final_model(model, loader, device):
    """
    Calculate and return the accuracy (average relative error) of the mode upon validation or test set.

    model: the model to evaluate.
    loader: the dataloader of test or validation set
    device: either CPU or CUDA
    """
    model.eval()
    model = model.to(device)
    rel = []
    rms = []
    log10 = []
    theta1 = []
    theta2 = []
    theta3 = []
    losses = []
    with torch.no_grad():
        for i, batch in enumerate(loader):
            X = torch.Tensor(batch["image"]).to(device)
            y = torch.Tensor(batch["depth"]).to(device)
            outputs = model(X)
            rel.append(torch.mean(torch.abs(outputs - y) / y).item())
            rms.append((torch.mean((outputs / y - 1)**2)**0.5).item())
            log10.append(
                torch.mean(torch.abs(torch.log10(outputs) -
                                     torch.log10(y))).item())
            theta1.append(
                torch.sum(
                    torch.lt(torch.maximum(outputs / y, y / outputs), 1.25)) /
                y.nelement())
            theta2.append(
                torch.sum(
                    torch.lt(torch.maximum(outputs / y, y / outputs),
                             1.25 * 1.25)) / y.nelement())
            theta3.append(
                torch.sum(
                    torch.lt(torch.maximum(outputs / y, y / outputs),
                             1.25 * 1.25 * 1.25)) / y.nelement())
            loss = DepthLoss(0.1).to(device)
            losses.append(loss(outputs, y).item())
        rel = sum(rel) / len(rel)
        rms = sum(rms) / len(rms)
        log10 = sum(log10) / len(log10)
        theta1 = sum(theta1) / len(theta1)
        theta2 = sum(theta2) / len(theta2)
        theta3 = sum(theta3) / len(theta3)
        loss = sum(losses) / len(losses)
    return rel, rms, log10, theta1, theta2, theta3, loss
Esempio n. 25
0
        def calculate_iou(best_pred, preds, areas):
            # Remove bboxes with IOU >= Thresh
            max_min_x = torch.maximum(best_pred[1], preds[1:, 1])
            max_min_y = torch.maximum(best_pred[2], preds[1:, 2])

            min_max_x = torch.minimum(best_pred[3], preds[1:, 3])
            min_max_y = torch.minimum(best_pred[4], preds[1:, 4])

            intersection_x = min_max_x - max_min_x
            intersection_y = min_max_y - max_min_y
            intersection_area = intersection_x * intersection_y

            iou = intersection_area / (areas[0] + areas[1:] -
                                       intersection_area)
            return iou
Esempio n. 26
0
    def get_mean_std(self):
        mean = self.mean
        std = torch.sqrt(
            torch.maximum(torch.zeros(1, device=self.device), self.mean_sq -
                          self.mean**2))  # clip to zero to avoid NaN

        return mean, std
Esempio n. 27
0
 def comparison_ops(self):
     a = torch.randn(4)
     b = torch.randn(4)
     return (
         torch.allclose(a, b),
         torch.argsort(a),
         torch.eq(a, b),
         torch.equal(a, b),
         torch.ge(a, b),
         torch.greater_equal(a, b),
         torch.gt(a, b),
         torch.greater(a, b),
         torch.isclose(a, b),
         torch.isfinite(a),
         torch.isin(a, b),
         torch.isinf(a),
         torch.isposinf(a),
         torch.isneginf(a),
         torch.isnan(a),
         torch.isreal(a),
         torch.kthvalue(a, 1),
         torch.le(a, b),
         torch.less_equal(a, b),
         torch.lt(a, b),
         torch.less(a, b),
         torch.maximum(a, b),
         torch.minimum(a, b),
         torch.fmax(a, b),
         torch.fmin(a, b),
         torch.ne(a, b),
         torch.not_equal(a, b),
         torch.sort(a),
         torch.topk(a, 1),
         torch.msort(a),
     )
Esempio n. 28
0
    def orthogonalized_raised_cosines(cls,
                                      dt,
                                      last_time_peak,
                                      n,
                                      b,
                                      a=1e0,
                                      weight=None):

        range_locs = torch.log(torch.tensor([0, last_time_peak]) + b)
        delta = (range_locs[1] - range_locs[0]) / (n - 1)
        locs = torch.linspace(range_locs[0], range_locs[1], n)

        last_time = torch.exp(range_locs[1] + 2 * delta / a) - b
        t = torch.arange(0, last_time, dt)
        support = torch.tensor([t[0], t[-1] + dt])

        pi_torch = torch.tensor([pi])
        raised_cosines = torch.minimum(
            a * (torch.log(t[:, None] + b) - locs[None, :]) * pi / delta / 2,
            pi_torch)
        raised_cosines = (
            1 + torch.cos(torch.maximum(-pi_torch, raised_cosines))) / 2
        raised_cosines = raised_cosines / torch.sqrt(
            torch.sum(raised_cosines**2, 0))
        u, s, v = torch.linalg.svd(raised_cosines)
        basis = u[:, :n]

        return cls(basis=basis, support=support, weight=weight)
Esempio n. 29
0
    def quantile_spline(
        self,
        alpha: torch.Tensor,
        dim: Optional[int] = None,
    ) -> torch.Tensor:
        # Refer to the description in quantile_internal

        qk_y = self.qk_y
        sk_x, delta_sk_x, delta_sk_y = (
            self.sk_x,
            self.delta_sk_x,
            self.delta_sk_y,
        )

        if dim is not None:
            qk_y = qk_y.unsqueeze(dim=0 if dim == 0 else -1)
            sk_x = sk_x.unsqueeze(dim=dim)
            delta_sk_x = delta_sk_x.unsqueeze(dim=dim)
            delta_sk_y = delta_sk_y.unsqueeze(dim=dim)

        if dim is None or dim == 0:
            alpha = alpha.unsqueeze(dim=-1)

        alpha = alpha.unsqueeze(dim=-1)

        spline_val = (alpha - sk_x) / delta_sk_x
        spline_val = torch.maximum(
            torch.minimum(spline_val, torch.ones_like(spline_val)),
            torch.zeros_like(spline_val),
        )

        return qk_y + torch.sum(spline_val * delta_sk_y, dim=-1)
Esempio n. 30
0
 def reduce_relu(self, nodes):
     w = torch.exp(self.w)
     R = torch.clamp(self.R, 0.000001, 0.999999)
     msg = w * nodes.mailbox['m'] + self.b
     fsum = torch.sum(torch.maximum(msg, R * msg), dim=1)
     out_h = (torch.minimum(fsum, fsum / R) - self.b) / w
     return {'sum_sigma_h': out_h}