def updateGradInput(self, input, y):
        v1 = input[0]
        v2 = input[1]

        gw1 = self.gradInput[0]
        gw2 = self.gradInput[1]
        gw1.resize_as_(v1).copy_(v2)
        gw2.resize_as_(v1).copy_(v1)

        torch.mul(self.w1, self.w22, out=self.buffer)
        gw1.addcmul_(-1, self.buffer.expand_as(v1), v1)
        gw1.mul_(self.w.expand_as(v1))

        torch.mul(self.w1, self.w32, out=self.buffer)
        gw2.addcmul_(-1, self.buffer.expand_as(v1), v2)
        gw2.mul_(self.w.expand_as(v1))

        # self._idx = self._outputs <= 0
        torch.le(self._outputs, 0, out=self._idx)
        self._idx = self._idx.view(-1, 1).expand(gw1.size())
        gw1[self._idx] = 0
        gw2[self._idx] = 0

        torch.eq(y, 1, out=self._idx)
        self._idx = self._idx.view(-1, 1).expand(gw2.size())
        gw1[self._idx] = gw1[self._idx].mul_(-1)
        gw2[self._idx] = gw2[self._idx].mul_(-1)

        if self.sizeAverage:
            gw1.div_(y.size(0))
            gw2.div_(y.size(0))

        return self.gradInput
Esempio n. 2
0
def smooth_L1(pred,targets,alpha_in,alpha_out,beta=1.0):
    x=(pred-targets)*alpha_in
    xabs=torch.abs(x)
    y1=0.5*x**2/beta
    y2=xabs-0.5*beta
    case1=torch.le(xabs,beta).float()
    case2=1-case1
    return torch.sum((y1*case1+y2*case2)*alpha_out)/pred.size(0)
Esempio n. 3
0
    def backward(self, grad_output):
        v1, v2, y = self.saved_tensors

        buffer = v1.new()
        _idx = v1.new().byte()

        gw1 = grad_output.new()
        gw2 = grad_output.new()
        gw1.resize_as_(v1).copy_(v2)
        gw2.resize_as_(v1).copy_(v1)

        torch.mul(self.w1, self.w22, out=buffer)
        gw1.addcmul_(-1, buffer.expand_as(v1), v1)
        gw1.mul_(self.w.expand_as(v1))

        torch.mul(self.w1, self.w32, out=buffer)
        gw2.addcmul_(-1, buffer.expand_as(v1), v2)
        gw2.mul_(self.w.expand_as(v1))

        torch.le(self._outputs, 0, out=_idx)
        _idx = _idx.view(-1, 1).expand(gw1.size())
        gw1[_idx] = 0
        gw2[_idx] = 0

        torch.eq(y, 1, out=_idx)
        _idx = _idx.view(-1, 1).expand(gw2.size())
        gw1[_idx] = gw1[_idx].mul_(-1)
        gw2[_idx] = gw2[_idx].mul_(-1)

        if self.size_average:
            gw1.div_(y.size(0))
            gw2.div_(y.size(0))

        grad_output_val = grad_output[0]
        if grad_output_val != 1:
            gw1.mul_(grad_output_val)
            gw2.mul_(grad_output_val)

        return gw1, gw2, None
Esempio n. 4
0
def pck(source_points,warped_points,L_pck,alpha=0.1):
    # compute precentage of correct keypoints
    batch_size=source_points.size(0)
    pck=torch.zeros((batch_size))
    for i in range(batch_size):
        p_src = source_points[i,:]
        p_wrp = warped_points[i,:]
        N_pts = torch.sum(torch.ne(p_src[0,:],-1)*torch.ne(p_src[1,:],-1))
        point_distance = torch.pow(torch.sum(torch.pow(p_src[:,:N_pts]-p_wrp[:,:N_pts],2),0),0.5)
        L_pck_mat = L_pck[i].expand_as(point_distance)
        correct_points = torch.le(point_distance,L_pck_mat*alpha)
        pck[i]=torch.mean(correct_points.float())
    return pck
Esempio n. 5
0
def get_reward_fn(env, states_tensor, actions_tensor):
    if (env == 'lin_dyn') or (env.spec.id == 'lin-dyn-v0'):
        #set actions multiplier to 0 to try with reinforce
        rewards = -(
            torch.einsum('ijk,ijk->ij', [states_tensor, states_tensor]) +
            torch.einsum('ijk,ijk->ij', [actions_tensor, actions_tensor]))
        #rewards = torch.clamp(states_tensor[:,0]**2, min=0., max=1.0)
        return rewards

    if env.spec.id == 'Pendulum-v0':
        thcos = states_tensor[:, :, 0]
        thsin = states_tensor[:, :, 1]
        thdot = states_tensor[:, :, 2]

        #pdb.set_trace()
        #tanth = thsin/thcos
        #tanth[torch.isnan(tanth)] = 0
        th = torch.atan2(thsin, thcos)

        if torch.isnan(th).any():
            pdb.set_trace()

        #u = torch.clamp(actions_tensor, min=-MAX_TORQUE, max=MAX_TORQUE).squeeze()
        u = actions_tensor.squeeze().unsqueeze(1)

        costs = angle_normalize(th)**2 + .1 * thdot**2 + .001 * (u**2)

        return -costs  #.unsqueeze(2)

    elif env.spec.id == 'HalfCheetah-v2':
        dt = 0.05  #from stepping through env
        xposbefore = states_tensor[:, 0]
        # xposbefore = self.sim.data.qpos[0]
        # self.do_simulation(action, self.frame_skip) #can't do this step because this is also for stepping through environment, but I actually HAVE the next states, and can compare them directly here
        xposafter = states_tensor  #self.sim.data.qpos[0]
        ob = self._get_obs()

        reward_ctrl = -0.1 * torch.square(actions_tensor).sum()

        reward_run = (xposafter - xposbefore) / dt

        reward = reward_ctrl + reward_run

        def _get_obs(self):
            return np.concatenate([
                self.sim.data.qpos.flat[1:],
                self.sim.data.qvel.flat,
            ])

    elif env.spec.id == 'dm-Pendulum-v0':
        COS_BND = np.cos(np.deg2rad(8))

        rewards = (torch.le(states_tensor[:, :, 0],
                            1) == torch.ge(states_tensor[:, :, 0], COS_BND))
        return rewards.double()

    elif env.spec.id == 'dm-Cartpole-swingup-v0':  #TAKES NEXT STATE FOR REWARD NOT CURRENT STATE
        rewards_to_return = torch.zeros(states_tensor.shape[0])
        for d in range(states_tensor.shape[0]):
            pole_angle_cosine = states_tensor[d, 1]
            upright = (pole_angle_cosine + 1) / 2
            centered = tolerance(states_tensor[d, 0], margin=2)
            centered = (1 + centered) / 2
            small_control = tolerance(actions_tensor[d, :],
                                      margin=1,
                                      value_at_margin=0,
                                      sigmoid='quadratic')[0]
            small_control = (4 + small_control) / 5
            small_velocity = tolerance(states_tensor[d, 4], margin=5).min()
            small_velocity = (1 + small_velocity) / 2

            rewards_to_return[d] = upright.mean(
            ) * small_control * small_velocity * centered  #torch.from_numpy(centered)
            # OrderedDict([('position', array([ 0.01871485, -0.99999419, -0.00340747])), ('velocity', array([0.04293839, 0.06518433]))])

        return rewards_to_return

    elif env.spec.id == 'CartPole-v0':
        theta_threshold_radians = 12 * 2 * np.pi / 360
        x_threshold = 2.4
        x = states_tensor[:, :, 0]
        #x_dot = states_tensor[:,:,1]
        theta = states_tensor[:, :, 2]
        #theta_dot = states_tensor[:,:,3]

        #this is a problem because ITS A BIG MATRIX WITH BATCHES AND DIFFERENT TIME STEPS!!!!!!
        done =  (x < -x_threshold) \
          | (x > x_threshold) \
          | (theta < -theta_threshold_radians) \
          | (theta > theta_threshold_radians)
        #done = bool(done)
        return done.transpose(1, 0)
        # if not done:
        #           reward = 1.0
        #       elif self.steps_beyond_done is None:
        #           # Pole just fell!
        #           self.steps_beyond_done = 0
        #           reward = 1.0
        #       else:
        #           if self.steps_beyond_done == 0:
        #               logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.")
        #           self.steps_beyond_done += 1
        #           reward = 0.0

        # else:
        # 	raise NotImplementedError

    # elif env.spec.id == 'dm_cartpole_balance':

    # 	states = states_tensor.cpu().detach().numpy()
    # 	print(states)
    # 	states = np.swapaxes(np.atleast_3d(states), 1,2)
    # 	pole_angle_cosine = states[:,:,1]
    # 	cart_position = states[:,:,0]
    # 	angular_vel = states[:,:,]

    # 	control = actions_tensor.cpu().detach().numpy().squeeze()

    # 	upright = (pole_angle_cosine + 1) / 2

    # 	centered = tolerance(cart_position, margin=2)
    # 	centered = (1 + centered) / 2
    # 	small_control = tolerance(actions_tensor, margin=1,
    # 					value_at_margin=0.000000001,
    # 					sigmoid='quadratic')[0]
    # 	small_control = (4 + small_control) / 5

    # 	small_velocity = tolerance(angular_vel, margin=5).min()
    # 	small_velocity = (1 + small_velocity) / 2

    # 	return torch.FloatTensor(np.expand_dims(upright.mean(axis=0),axis=1) * small_control * small_velocity * centered.T)

    return 0
Esempio n. 6
0
File: losses.py Progetto: Don98/Loss
    def forward(self, classifications, regressions, anchors, annotations):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []
        anchor = anchors[0, :, :]

        anchor_widths  = anchor[:, 2] - anchor[:, 0]
        anchor_heights = anchor[:, 3] - anchor[:, 1]
        anchor_ctr_x   = anchor[:, 0] + 0.5 * anchor_widths
        anchor_ctr_y   = anchor[:, 1] + 0.5 * anchor_heights
        # print("Batch size : ",batch_size)
        # print("Class size : ",classifications.shape)
        # print("Ressg size : ",regressions.shape)
        # print(annotations)
        num = len(os.listdir("./"))
        f = open("record" + str(num) + ".txt","w")
        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]
            # print("bbox_annotation shape is : ",bbox_annotation.shape)
            # print(bbox_annotation)
            for i in range(bbox_annotation.shape[0]):
                f.write(str(bbox_annotation[i])[7:-18] + "\n")
            f.write("="*50 + "\n")

            if bbox_annotation.shape[0] == 0:
                # print(annotations)
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).float().cuda())
                    classification_losses.append(torch.tensor(0).float().cuda())
                else:
                    regression_losses.append(torch.tensor(0).float())
                    classification_losses.append(torch.tensor(0).float())
                f.write("0 0\n")
                continue
            
            if torch.cuda.is_available():
                each_bbox_loss = torch.zeros(bbox_annotation.shape[0]).cuda()
            else:
                each_bbox_loss = torch.zeros(bbox_annotation.shape[0])
            
            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
            IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations
            # print("IOU shape is : " , IoU.shape)
            # print(IoU)
            IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1
            # print("IOU_max shape is : ",IoU_max.shape)
            # print(IoU_max)
            # print("IOU_argmax shape is : " ,IoU_argmax.shape)
            # print(IoU_argmax)
            
            #import pdb
            #pdb.set_trace()

            # compute the loss for classification
            targets = torch.ones(classification.shape) * -1

            # print("Target shape is : ",targets.shape)
            if torch.cuda.is_available():
                targets = targets.cuda()

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)
            
            # print("positive_indices shape is ", positive_indices.shape)
            # print(positive_indices)
            num_positive_anchors = positive_indices.sum()
            # print(num_positive_anchors)
            assigned_annotations = bbox_annotation[IoU_argmax, :]
            # print("assigned_annotations shape is : " ,assigned_annotations.shape)
            # print(assigned_annotations)
            # classP   True_CLASS
            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
            # print("target shape is : " ,targets.shape)
            # print(targets)
            if torch.cuda.is_available():
                alpha_factor = torch.ones(targets.shape).cuda() * alpha
            else:
                alpha_factor = torch.ones(targets.shape) * alpha
            # = -(aplha)^gamma * log(classification) - (1 - alpha)^gamma * log(1 - classification) 
            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))

            # print("focal shape is ",focal_weight.shape)
            # print(focal_weight)
            # print("BCE shape is ",bce.shape)
            # print(bce)
            
            # cls_loss = focal_weight * torch.pow(bce, gamma)
            cls_loss = focal_weight * bce
            # cls_loss = bce
            # print("CLS loss shape is : " , cls_loss.shape)
            # print(cls_loss)

            # print("cls_loss[0] : " ,cls_loss[0])
            if torch.cuda.is_available():
                cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
            else:
                cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape))
            tmp1 = bce[positive_indices]
            # print("Tmp1 shape is : " ,tmp1.shape)
            # tmp5 = classification[positive_indices,assigned_annotations[positive_indices, 4].long()]
            tmp = classification[positive_indices,:]
            # print("Tmp shape is : " , tmp.shape)
            # P bbox_annotation clss_loss
            tmp2 = cls_loss[positive_indices]
            # print("Tmp2 shape is : " , tmp2.shape)
            
            classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0))
            
            f.write(str(classification_losses) + "\n")    
            # print("clss_loss sum is : ",cls_loss.sum())
            # print("num_positive_anchors is : ",cls_loss.sum())
            # print("classification_losses shape is : " , len(classification_losses))
            # print(classification_losses)
            
            # print("final:")
            # print(alpha_factor[positive_indices][0])
            # print(focal_weight[positive_indices][0])
            # print(bce[positive_indices][0])
            # print(cls_loss[positive_indices][0])
            # compute the loss for regression
            # start = 0;end = 0;start1 = 0;end1 = 0
            # if tmp.shape[0] != 0:
                # start  = str(tmp1[0]).index("[")
                # end    = str(tmp1[0]).index("]")
                # start1 = str(tmp[0]).index("[")
                # end1   = str(tmp[0]).index("]")
            # else:
                # print(tmp.shape,positive_indices.sum())
                # print(classification.shape)
                # print(bce.shape)
                # print(cls_loss.shape)
                # print(cls_loss.sum())
                # print(torch.clamp(num_positive_anchors.float(), min=1.0))
            the_Iou_argmax = IoU_argmax[positive_indices]
            for i in range(tmp.shape[0]):
                # print('{}'.format(tmp[i].data))
                # length1 = len(str(tmp1[i]))
                # length  = len(str(tmp[i]))
                # if(start >=  length1 or str(tmp1[i])[start] != "["):
                    # start  = str(tmp1[i]).index("[")
                # if(end >= length1 or str(tmp1[i])[end] != "]"):
                    # end    = str(tmp1[i]).index("]")
                # if(start1 >= length or str(tmp[i])[start1] != "["):
                    # start1 = str(tmp[i]).index("[")
                # if(end1 >= length or str(tmp[i])[end1] != "]"):
                    # end1   = str(tmp[i]).index("]")
                f.write(str(tmp[i])+ " "+ str(the_Iou_argmax[i].item()) + " " + str(tmp1[i]) + " " + str(tmp2[i].sum().item()) + "\n") 
            f.write("-"*50+"\n")
            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths  = assigned_annotations[:, 2] - assigned_annotations[:, 0]
                gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
                gt_ctr_x   = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y   = assigned_annotations[:, 1] + 0.5 * gt_heights

                # clip widths to 1
                gt_widths  = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
                targets = targets.t()
                # print("New Target shape is ",targets.shape)
                # print(targets)
                if torch.cuda.is_available():
                    targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()
                else:
                    targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]])

                negative_indices = 1 + (~positive_indices)
                # print("~positive_indices shape is : ")
                # print(~positive_indices)
                # print("negative_indices shape is :" ,negative_indices.shape)
                # print(negative_indices)
                regression_diff = torch.abs(targets - regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0
                )
                # print("regression_loss shape is : ", regression_loss.shape)
                # print(regression_loss)
                regression_losses.append(regression_loss.mean())
                f.write(str(regression_losses[-1].item()) + "\n")
                # start = str(regression_loss[0]).index("[")
                # end = str(regression_loss[0]).index("]")
                for i in range(regression_loss.shape[0]):
                    # if(str(regression_loss[i])[start] != "["):
                        # start = str(regression_loss[i]).index("[")
                    # if(str(regression_loss[i])[end] != "["):
                        # end = str(regression_loss[i]).index("]")
                    f.write(str(IoU_argmax[positive_indices][i].item()) + " " + str(regression_loss[i]) + " " + str(IoU_max[positive_indices][i].item()) + " " + str(anchor[positive_indices][i]) + "\n")
                
            else:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).float().cuda())
                else:
                    regression_losses.append(torch.tensor(0).float())
                f.write("0")
        result = torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
        f.write(str((classification_losses[-1] + regression_losses[-1]).item()) + "\n")
        f.close()
        return result
Esempio n. 7
0
def get_mel_banks(num_bins, window_length_padded, sample_freq, low_freq,
                  high_freq, vtln_low, vtln_high, vtln_warp_factor):
    # type: (int, int, float, float, float, float, float)
    """
    Returns:
        Tuple[torch.Tensor, torch.Tensor]: The tuple consists of ``bins`` (which is
        melbank of size (``num_bins``, ``num_fft_bins``)) and ``center_freqs`` (which is
        center frequencies of bins of size (``num_bins``)).
    """
    assert num_bins > 3, 'Must have at least 3 mel bins'
    assert window_length_padded % 2 == 0
    num_fft_bins = window_length_padded / 2
    nyquist = 0.5 * sample_freq

    if high_freq <= 0.0:
        high_freq += nyquist

    assert (0.0 <= low_freq < nyquist) and (0.0 < high_freq <= nyquist) and (low_freq < high_freq), \
        ('Bad values in options: low-freq %f and high-freq %f vs. nyquist %f' % (low_freq, high_freq, nyquist))

    # fft-bin width [think of it as Nyquist-freq / half-window-length]
    fft_bin_width = sample_freq / window_length_padded
    mel_low_freq = mel_scale_scalar(low_freq)
    mel_high_freq = mel_scale_scalar(high_freq)

    # divide by num_bins+1 in next line because of end-effects where the bins
    # spread out to the sides.
    mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1)

    if vtln_high < 0.0:
        vtln_high += nyquist

    assert vtln_warp_factor == 1.0 or ((low_freq < vtln_low < high_freq) and
                                       (0.0 < vtln_high < high_freq) and (vtln_low < vtln_high)), \
        ('Bad values in options: vtln-low %f and vtln-high %f, versus low-freq %f and high-freq %f' %
            (vtln_low, vtln_high, low_freq, high_freq))

    bin = torch.arange(num_bins).unsqueeze(1)
    left_mel = mel_low_freq + bin * mel_freq_delta  # size(num_bins, 1)
    center_mel = mel_low_freq + (bin +
                                 1.0) * mel_freq_delta  # size(num_bins, 1)
    right_mel = mel_low_freq + (bin +
                                2.0) * mel_freq_delta  # size(num_bins, 1)

    if vtln_warp_factor != 1.0:
        left_mel = vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, high_freq,
                                      vtln_warp_factor, left_mel)
        center_mel = vtln_warp_mel_freq(vtln_low, vtln_high, low_freq,
                                        high_freq, vtln_warp_factor,
                                        center_mel)
        right_mel = vtln_warp_mel_freq(vtln_low, vtln_high, low_freq,
                                       high_freq, vtln_warp_factor, right_mel)

    center_freqs = inverse_mel_scale(center_mel)  # size (num_bins)
    # size(1, num_fft_bins)
    mel = mel_scale(fft_bin_width * torch.arange(num_fft_bins)).unsqueeze(0)

    # size (num_bins, num_fft_bins)
    up_slope = (mel - left_mel) / (center_mel - left_mel)
    down_slope = (right_mel - mel) / (right_mel - center_mel)

    if vtln_warp_factor == 1.0:
        # left_mel < center_mel < right_mel so we can min the two slopes and clamp negative values
        bins = torch.max(torch.zeros(1), torch.min(up_slope, down_slope))
    else:
        # warping can move the order of left_mel, center_mel, right_mel anywhere
        bins = torch.zeros_like(up_slope)
        up_idx = torch.gt(mel, left_mel) & torch.le(
            mel, center_mel)  # left_mel < mel <= center_mel
        down_idx = torch.gt(mel, center_mel) & torch.lt(
            mel, right_mel)  # center_mel < mel < right_mel
        bins[up_idx] = up_slope[up_idx]
        bins[down_idx] = down_slope[down_idx]

    return bins, center_freqs
Esempio n. 8
0
 def calculate(self, sample_list, model_output, k, *args, **kwargs):
     ranks = self.get_ranks(sample_list, model_output)
     recall = float(torch.sum(torch.le(ranks, k))) / ranks.size(0)
     return recall
Esempio n. 9
0
def less_equal(x, y, **kwargs):
    if not torch.is_tensor(x):
        x = torch.tensor(x)
    if not torch.is_tensor(y):
        y = torch.tensor(y)
    return torch.le(x, y, **kwargs)
Esempio n. 10
0
    def forward(self, classifications, regressions, anchors, annotations):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[0, :, :]

        anchor_widths = anchor[:, 2] - anchor[:, 0]
        anchor_heights = anchor[:, 3] - anchor[:, 1]
        anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).float().cuda())
                    classification_losses.append(
                        torch.tensor(0).float().cuda())
                else:
                    regression_losses.append(torch.tensor(0).float())
                    classification_losses.append(torch.tensor(0).float())

                continue

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            IoU = calc_iou(
                anchors[0, :, :],
                bbox_annotation[:, :4])  # num_anchors x num_annotations

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)  # num_anchors x 1

            #import pdb
            #pdb.set_trace()

            # compute the loss for classification
            targets = torch.ones(classification.shape) * -1

            if torch.cuda.is_available():
                targets = targets.cuda()

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices,
                                                           4].long()] = 1

            if torch.cuda.is_available():
                alpha_factor = torch.ones(targets.shape).cuda() * alpha
            else:
                alpha_factor = torch.ones(targets.shape) * alpha

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) +
                    (1.0 - targets) * torch.log(1.0 - classification))

            # cls_loss = focal_weight * torch.pow(bce, gamma)
            cls_loss = focal_weight * bce

            if torch.cuda.is_available():
                cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss,
                                       torch.zeros(cls_loss.shape).cuda())
            else:
                cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss,
                                       torch.zeros(cls_loss.shape))

            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.float(), min=1.0))

            # compute the loss for regression

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # clip widths to 1
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dx, targets_dy, targets_dw, targets_dh))
                targets = targets.t()

                if torch.cuda.is_available():
                    targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]
                                                      ]).cuda()
                else:
                    targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]])

                negative_indices = 1 + (~positive_indices)

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).float().cuda())
                else:
                    regression_losses.append(torch.tensor(0).float())

        return torch.stack(classification_losses).mean(
            dim=0,
            keepdim=True), torch.stack(regression_losses).mean(dim=0,
                                                               keepdim=True)
Esempio n. 11
0
y.scatter_(-1, input_y.unsqueeze(1), 1)
# Parameters:
# 1st: dim, along which attribute.
# 2nd: the column tensor indicating the indices of the elements to scatter.
# - the tensor should have the same # of dimensions as the y_onehot, which is 2, so 
# we use unsqueeze to adds an extra dimension. (from (4898) to (4898,1)).
# 3rd: tensor containing the elements to scatter.
print(y[:10])

# Normalize the input data (Using z-norm/standardization):
x_mean = torch.mean(input_x, dim=0)
x_variance = torch.var(input_x, dim=0)
x = (input_x - x_mean) / torch.sqrt(x_variance)
print(x[:10])

# Determine which types of wine are bad:
# wines with rank < 3 is bad:
bad_index = torch.le(input_y, 3)
print(bad_index.shape, bad_index[:10], bad_index.sum())

bad_data = data[torch.le(input_y, 3)]
mid_data = data[torch.lt(input_y, 7) & torch.gt(input_y, 3)]
good_data = data[torch.ge(input_y, 7)]

bad_mean = torch.mean(bad_data, dim=0)
mid_mean = torch.mean(mid_data, dim=0)
good_mean = torch.mean(good_data, dim=0)

for i, args in enumerate(zip(next(csv.reader(open(file_path), delimiter=';')), bad_mean, mid_mean, good_mean)):
    print('{:2} {:20} {:6.2f} {:6.2f} {:6.2f}'.format(i, *args))
Esempio n. 12
0
    def ppo_update(self,
                   states,
                   actions,
                   log_probs,
                   returns,
                   advantages,
                   sg_returns,
                   sg_advantage,
                   c_q_returns,
                   c_costs,
                   clip_param=0.2):
        """
        does the actual PPO update here
        """
        for _ in range(self.ppo_epochs):
            for state, action, old_log_probs, return_, advantage, sg_adv, sg_return_, c_q_return_, c_cost_ in self.ppo_iter(
                    states, actions, log_probs, returns, advantages,
                    sg_returns, sg_advantage, c_q_returns, c_costs):

                val, mu_safe, dist = self.safe_ac(state, current_cost=c_cost_)

                cost_q_val = self.cost_critic(state, mu_safe.detach())

                # for actor
                entropy = dist.entropy().mean()
                new_log_probs = dist.log_prob(action)

                ratio = (new_log_probs - old_log_probs).exp()
                surr1 = ratio * advantage
                surr2 = torch.clamp(ratio, 1.0 - self.clip_param,
                                    1.0 + self.clip_param) * advantage

                actor_loss = -torch.min(surr1, surr2)

                if self.args.cost_sg_coeff:
                    # safeguard policy here, without baseline
                    _, sg_mu, sg_std = self.ac_model(state)
                    sg_val = self.sg_model(state)
                    unconst_dist = torch.distributions.Normal(sg_mu, sg_std)
                    sg_new_log_probs = unconst_dist.log_prob(action)

                    sg_ratio = (sg_new_log_probs - old_log_probs).exp()

                    sg_1 = sg_ratio * sg_adv
                    sg_2 = torch.clamp(sg_ratio, 1.0 - self.clip_param,
                                       1.0 + self.clip_param) * sg_adv
                    sg_loss = -torch.min(sg_1, sg_2)

                    violate_mask = torch.le(c_q_return_ + c_q_return_,
                                            self.args.d0).float().detach()

                    actor_loss = violate_mask * actor_loss + (
                        1. - violate_mask) * self.args.cost_sg_coeff * sg_loss
                #--------------------------------------------------------------

                actor_loss = actor_loss.mean()

                # add to the final  ac loss
                critic_loss = (return_ - val).pow(2).mean()

                ac_loss = (self.args.value_loss_coef * critic_loss) + \
                        (actor_loss) - (self.args.beta * entropy)

                self.ac_optimizer.zero_grad()
                ac_loss.backward()
                self.ac_optimizer.step()

                # for costs
                # for reviewer
                self.cost_critic.zero_grad()

                cost_critic_loss = (c_q_return_ - cost_q_val).pow(2).mean()

                self.critic_optimizer.zero_grad()
                cost_critic_loss.backward()

                self.critic_optimizer.step()

                # clean everything just in case
                self.clear_models_grad()

                # extra step
                if self.args.cost_sg_coeff:
                    sg_val_loss = self.args.value_loss_coef * (
                        sg_return_ - sg_val).pow(2).mean()
                    sg_val_loss.backward()
                    self.sg_optimizer.step()

                    # clean everything just in case
                    self.clear_models_grad()
Esempio n. 13
0
    def negLogLikelihoodLoss(self, batchInput):
        wordSeqTensor, tagSeqTensor, wordSeqLengths, charSeqTensor, charSeqLengths, seq2NodeTensor, node2SeqTensor, adjMatrixTensor, gazNode2Idxs, gazNodeLengths, nodeNums, gazBlankState, fwbigramTensor, bwbigramTensor = batchInput
        batchSize = wordSeqTensor.shape[0]
        sentLength = wordSeqTensor.shape[1]
        maskTemp = torch.arange(1, sentLength + 1, dtype=torch.int64).view(
            1, sentLength).expand(batchSize, sentLength)
        if self.useGpu:
            maskTemp = move2cuda(maskTemp)
        mask = torch.le(
            maskTemp,
            wordSeqLengths.view(batchSize, 1).expand(batchSize, sentLength))
        if self.useGpu:
            mask = move2cuda(mask)
        if self.useChar:
            wordSeqEmbedding = self.dropout(
                self.wordEmbedding(wordSeqTensor, charSeqTensor,
                                   charSeqLengths))
        else:
            if self.useBigram:
                wordSeqEmbedding = self.dropout(
                    torch.cat([
                        self.wordEmbedding(wordSeqTensor),
                        self.fwbigramEmbedding(fwbigramTensor),
                        self.bwbigramEmbedding(bwbigramTensor)
                    ], 2))
            else:
                wordSeqEmbedding = self.dropout(
                    self.wordEmbedding(wordSeqTensor))

        wordStateEmbedding = self.embStateLinear(wordSeqEmbedding)
        maxMainNodeLength = node2SeqTensor.shape[1]
        mainNodeState = torch.gather(
            wordStateEmbedding, 1,
            node2SeqTensor.expand(batchSize, maxMainNodeLength,
                                  wordStateEmbedding.shape[2]))
        if self.gaNum > 0:
            initNodeStateEmbedding = torch.cat([
                mainNodeState,
                gazBlankState.view(batchSize, -1, 1).expand(
                    batchSize, -1, self.stateDim)
            ],
                                               dim=1)
        else:
            initNodeStateEmbedding = mainNodeState
        startNodeIdx = nodeNums.clone()
        for gazIdx in range(self.gaNum):
            gazState = self.gaLinear[gazIdx](self.gaEmb[gazIdx](
                gazNode2Idxs[gazIdx]))
            gazMaskRaw = torch.arange(0, gazState.shape[1],
                                      dtype=torch.int64).view(
                                          1, gazState.shape[1],
                                          1).expand(batchSize,
                                                    gazState.shape[1],
                                                    self.stateDim)
            if self.useGpu:
                gazMaskRaw = move2cuda(gazMaskRaw)
            gazMask = torch.where(
                gazMaskRaw < gazNodeLengths[gazIdx].view(batchSize, 1, 1),
                gazMaskRaw, gazNodeLengths[gazIdx].view(batchSize, 1, 1))
            if self.useGpu:
                gazMask = move2cuda(gazMask)
            gazMask = gazMask + startNodeIdx.view(batchSize, 1, 1).expand(
                batchSize, gazState.shape[1], self.stateDim)
            initNodeStateEmbedding.scatter_(1, gazMask, gazState)
            startNodeIdx = startNodeIdx + gazNodeLengths[gazIdx]

        nodeGraphEmbeddings = [initNodeStateEmbedding]
        for i in range(self.nLayer):
            nodeGraphEmbeddings.append(
                self.graphEmb[i](nodeGraphEmbeddings[i], adjMatrixTensor,
                                 adjMatrixTensor.shape[1]))
        nodeGraphEmbedding = nodeGraphEmbeddings[self.nLayer]
        wordGraphEmbedding = torch.gather(
            nodeGraphEmbedding, 1,
            seq2NodeTensor.expand(
                [batchSize, sentLength, nodeGraphEmbedding.shape[2]]))

        if self.useRnn:
            rnnEmbedding = self.encoder(wordGraphEmbedding, wordSeqLengths)
            wordFeatures = self.logsoftmax(self.embFeatureLinear(rnnEmbedding))
        else:
            wordFeatures = self.logsoftmax(
                self.embFeatureLinear(wordGraphEmbedding))
        totalScore, scores = self.crf(wordFeatures, wordSeqLengths, mask)
        goldScore = self.crf.scoreSentence(tagSeqTensor, wordSeqLengths,
                                           scores, mask)

        return totalScore - goldScore
Esempio n. 14
0
    def test_comparison_ops_with_type_promotion(self, device):
        value_for_type = {
            torch.uint8: (1 << 5),
            torch.int8: (1 << 5),
            torch.int16: (1 << 10),
            torch.int32: (1 << 20),
            torch.int64: (1 << 35),
            torch.float16: (1 << 10),
            torch.float32: (1 << 20),
            torch.float64: (1 << 35)
        }
        comparison_ops = [
            dict(
                name="lt",
                out_op=lambda x, y, d: torch.lt(
                    x, y, out=torch.empty(1, dtype=torch.bool, device=d)),
                ret_op=lambda x, y: torch.lt(x, y),
                compare_op=lambda x, y: x < y,
            ),
            dict(
                name="le",
                out_op=lambda x, y, d: torch.le(
                    x, y, out=torch.empty(1, dtype=torch.bool, device=d)),
                ret_op=lambda x, y: torch.le(x, y),
                compare_op=lambda x, y: x <= y,
            ),
            dict(
                name="gt",
                out_op=lambda x, y, d: torch.gt(
                    x, y, out=torch.empty(1, dtype=torch.bool, device=d)),
                ret_op=lambda x, y: torch.gt(x, y),
                compare_op=lambda x, y: x > y,
            ),
            dict(
                name="ge",
                out_op=lambda x, y, d: torch.ge(
                    x, y, out=torch.empty(1, dtype=torch.bool, device=d)),
                ret_op=lambda x, y: torch.ge(x, y),
                compare_op=lambda x, y: x >= y,
            ),
            dict(
                name="eq",
                out_op=lambda x, y, d: torch.eq(
                    x, y, out=torch.empty(1, dtype=torch.bool, device=d)),
                ret_op=lambda x, y: torch.eq(x, y),
                compare_op=lambda x, y: x == y,
            ),
            dict(
                name="ne",
                out_op=lambda x, y, d: torch.ne(
                    x, y, out=torch.empty(1, dtype=torch.bool, device=d)),
                ret_op=lambda x, y: torch.ne(x, y),
                compare_op=lambda x, y: x != y,
            ),
        ]
        for op in comparison_ops:
            for dt1 in torch.testing.get_all_math_dtypes(device):
                for dt2 in torch.testing.get_all_math_dtypes(device):
                    val1 = value_for_type[dt1]
                    val2 = value_for_type[dt2]
                    t1 = torch.tensor([val1], dtype=dt1, device=device)
                    t2 = torch.tensor([val2], dtype=dt2, device=device)
                    expected = torch.tensor([op["compare_op"](val1, val2)],
                                            dtype=torch.bool)

                    out_res = op["out_op"](t1, t2, device)
                    self.assertEqual(out_res, expected)
                    self.assertTrue(out_res.dtype == torch.bool)
                    self.assertTrue(t1.dtype == dt1)
                    self.assertTrue(t2.dtype == dt2)

                    out_res = op["ret_op"](t1, t2)
                    self.assertEqual(out_res, expected)
                    self.assertTrue(out_res.dtype == torch.bool)
                    self.assertTrue(t1.dtype == dt1)
                    self.assertTrue(t2.dtype == dt2)

                    # test that comparing a zero dim tensor with another zero dim tensor has type promotion behavior
                    t1 = torch.tensor(val1, dtype=dt1, device=device)
                    t2 = torch.tensor(val2, dtype=dt2, device=device)
                    expected = torch.tensor(op["compare_op"](val1, val2),
                                            dtype=torch.bool)

                    out_res = op["out_op"](t1, t2, device)
                    self.assertEqual(out_res, expected)
                    self.assertTrue(out_res.dtype == torch.bool)
                    self.assertTrue(t1.dtype == dt1)
                    self.assertTrue(t2.dtype == dt2)

                    out_res = op["ret_op"](t1, t2)
                    self.assertEqual(out_res, expected)
                    self.assertTrue(out_res.dtype == torch.bool)
                    self.assertTrue(t1.dtype == dt1)
                    self.assertTrue(t2.dtype == dt2)
Esempio n. 15
0
    def infer(self, memory, memory_lengths):
        """ Decoder inference
        PARAMS
        ------
        memory: Encoder outputs

        RETURNS
        -------
        mel_outputs: mel outputs from the decoder
        gate_outputs: gate outputs from the decoder
        alignments: sequence of attention weights from the decoder
        """
        decoder_input = self.get_go_frame(memory)

        mask = get_mask_from_lengths(memory_lengths)
        (attention_hidden, attention_cell, decoder_hidden, decoder_cell,
         attention_weights, attention_weights_cum, attention_context,
         processed_memory) = self.initialize_decoder_states(memory)

        mel_lengths = torch.zeros([memory.size(0)],
                                  dtype=torch.int32,
                                  device=memory.device)
        not_finished = torch.ones([memory.size(0)],
                                  dtype=torch.int32,
                                  device=memory.device)

        mel_outputs, gate_outputs, alignments = (torch.zeros(1),
                                                 torch.zeros(1),
                                                 torch.zeros(1))
        first_iter = True
        while True:
            decoder_input = self.prenet(decoder_input)
            (mel_output, gate_output, attention_hidden, attention_cell,
             decoder_hidden, decoder_cell, attention_weights,
             attention_weights_cum, attention_context) = self.decode(
                 decoder_input, attention_hidden, attention_cell,
                 decoder_hidden, decoder_cell, attention_weights,
                 attention_weights_cum, attention_context, memory,
                 processed_memory, mask)

            if first_iter:
                mel_outputs = mel_output.unsqueeze(0)
                gate_outputs = gate_output
                alignments = attention_weights
                first_iter = False
            else:
                mel_outputs = torch.cat((mel_outputs, mel_output.unsqueeze(0)),
                                        dim=0)
                gate_outputs = torch.cat((gate_outputs, gate_output), dim=0)
                alignments = torch.cat((alignments, attention_weights), dim=0)

            dec = torch.le(torch.sigmoid(gate_output),
                           self.gate_threshold).to(torch.int32).squeeze(1)

            not_finished = not_finished * dec
            mel_lengths += not_finished

            if self.early_stopping and torch.sum(not_finished) == 0:
                break
            if len(mel_outputs) == self.max_decoder_steps:
                print("Warning! Reached max decoder steps")
                break

            decoder_input = mel_output

        mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs(
            mel_outputs, gate_outputs, alignments)

        return mel_outputs, gate_outputs, alignments, mel_lengths
Esempio n. 16
0
def percentage_correct_keypoints(keypoints: np.array,
                                 predictions: np.array,
                                 thresh: float = 0.5,
                                 pck_type: str = "object",
                                 image_size: float = None):
    """

    Args:
        keypoints: Keypoints with shape [B, N, 2] or [N,2]
        predictions: Predicted keypoints with shape [B, N, 2] or [N,2]
        image_size (optional): indicates the size of the image, necessary when pck_type == "image"
        thresh: threshold for pck
        pck_type (optional): default object, indicates which way to compute the pck, e.g. via image size
                            or max object distance
                            "object": take the max of the object * alpha
                            "image": take the image width/height * alpha

    Returns: pck mean, pck per joint

    """
    if pck_type == "image" and image_size == None:
        raise ValueError(f"When using pck_type='image', then you need to pass the image_size!")
    assert pck_type in ["image", "object"], f"Got wrong pck_type, got {pck_type}"
    assert len(keypoints.shape) == 3, f"Only implemented for a batch got shape of keypoints: {keypoints.shape}"
    keypoints = sure_to_torch(keypoints).cpu()
    predictions = sure_to_torch(predictions).cpu()
    assert len(keypoints) == len(predictions), "Keypoints and predictions tensor need to have the same size."
    batch_size = keypoints.size(0)
    pck = torch.zeros(batch_size)
    num_pts = torch.zeros(batch_size)
    num_joints = torch.zeros((batch_size, keypoints.size(1)))
    correct_index = -torch.ones((batch_size, len(keypoints[0])))
    l2distance = torch.zeros((batch_size, len(keypoints[0])))
    for idx in range(batch_size):
        # computes pck for all keypoint pairs of once instance
        p_src = keypoints[idx, :]
        p_pred = predictions[idx, :]
        # True values in mask indicate the keypoint was present in the dataset
        # Negative values indicate the value was not in the dataset
        mask = torch.ne(p_src[:, 0], 0) * torch.ne(p_src[:, 1], 0)
        # if only one point is present
        if len(p_src[mask]) < 2:
            pck[idx] = 0
            correct_index[idx, :] = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
            continue

        num_joints[idx] = mask
        if pck_type == 'object':
            l_pck = row_pairwise_distances(p_src[mask])
            # l_pck = torch.Tensor([torch.max(p_src.max(1)[0] - p_src.min(1)[0])])
        elif pck_type == 'image':
            l_pck = torch.Tensor([image_size])

        # Sum all available keypoints in the dataset
        N_pts = torch.sum(mask)
        # Set points not present in the dataset to false in source and target points
        p_src[~mask, :] = 0
        p_pred[~mask, :] = 0
        num_pts[idx] = N_pts
        point_distance = torch.pow(torch.sum(torch.pow(p_src - p_pred, 2), 1), 0.5)  # 0.5 means squared!!
        point_distance[~mask] = 0
        l2distance[idx, :] = point_distance.view(-1)
        L_pck_mat = l_pck.expand_as(point_distance)  # val -> val, val
        correct_points = torch.le(point_distance, L_pck_mat * thresh).type(torch.uint8)

        correct_points[~mask] = 0

        # C_pts = torch.sum(correct_points)
        correct_index[idx, :] = correct_points.view(-1)
        # PCK for the image is divided by the number of valid points in GT
        # correct_not_found = sum(p_pred[~mask][:,0] == 0)
        pck[idx] = torch.sum(correct_points.float()) / torch.clamp(N_pts.float(), min=1e-6)
        assert pck[idx] >= 0

    # Reduce to joint granularity
    correct_per_joint = torch.sum(correct_index, dim=0)
    sum_available_joint = torch.sum(num_joints, dim=0)
    l2_average = torch.sum(l2distance) / torch.sum(num_joints)
    l2_average_joint = torch.sum(l2distance, dim=0) / torch.clamp(sum_available_joint, min=1e-6)
    # clamp the tensor, sometimes we have zero available joints and then we have NaN values
    pck_joints = correct_per_joint / torch.clamp(sum_available_joint, min=1e-6)
    pck_average = torch.sum(correct_index) / torch.sum(num_joints)
    return pck_average.numpy(), pck_joints.numpy(), l2_average.detach().numpy(), l2_average_joint.detach().numpy()
Esempio n. 17
0
    def forward(self, classifications, regressions, anchors, annotations):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[0, :, :]  # 形状为[5*K*A, 4]

        anchor_widths = anchor[:, 2] - anchor[:, 0]
        anchor_heights = anchor[:, 3] - anchor[:, 1]
        anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]  # (5*H*W*A)*K
            regression = regressions[j, :, :]  # (5*H*W*A)*4

            bbox_annotation = annotations[j, :, :]  # num_annots * 5
            bbox_annotation = bbox_annotation[
                bbox_annotation[:,
                                4] != -1]  # 取出正常标注的样本的标注, valid_num_annots * 5

            if bbox_annotation.shape[0] == 0:
                regression_losses.append(torch.tensor(0).float().cuda())
                classification_losses.append(torch.tensor(0).float().cuda())

                continue

            classification = torch.clamp(classification, 1e-4,
                                         1.0 - 1e-4)  # (5*H*W*A)*K

            IoU = calc_iou(
                anchor,
                bbox_annotation[:, :4])  # num_anchors x valid_num_annots

            # IoU_max表示每个anchor与标注框重叠度最高的那个标注框之间的IoU
            # IoU_argmax表示每个anchor与标注框重叠度最高的那个标注框的索引
            IoU_max, IoU_argmax = torch.max(IoU, dim=1)  # (num_anchors, )

            # import pdb
            # pdb.set_trace()

            # compute the loss for classification
            targets = torch.ones(classification.shape) * -1  # (5*H*W*A)*K
            targets = targets.cuda()

            targets[torch.lt(IoU_max, 0.4), :] = 0  # IOU<0.4的设为0

            positive_indices = torch.ge(
                IoU_max, 0.5)  # IOU>=0.5的anchors的索引的掩码, (num_anchors, )

            num_positive_anchors = positive_indices.sum()  # 正样本的数量

            assigned_annotations = bbox_annotation[
                IoU_argmax, :]  # (num_anchors, 5), anchors对应的标注框

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[
                positive_indices,
                4].long()] = 1  # 正样本的one-hot向量, (num_anchors, K)

            alpha_factor = torch.full(targets.shape, fill_value=alpha).cuda()

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) +
                    (1.0 - targets) * torch.log(1.0 - classification))

            # cls_loss = focal_weight * torch.pow(bce, gamma)
            cls_loss = focal_weight * bce

            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss,
                                   torch.zeros(cls_loss.shape).cuda())

            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.float(), min=1.0))

            # compute the loss for regression

            if num_positive_anchors > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[
                    positive_indices]  # 正样本的anchors的宽度
                anchor_heights_pi = anchor_heights[
                    positive_indices]  # 正样本的anchors的高度
                anchor_ctr_x_pi = anchor_ctr_x[
                    positive_indices]  # 正样本的anchors的中心x坐标
                anchor_ctr_y_pi = anchor_ctr_y[
                    positive_indices]  # 正样本的anchors的中心y坐标

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # clip widths to 1
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi
                              ) / anchor_widths_pi  # (num_anchors, )
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dx, targets_dy, targets_dw, targets_dh))
                targets = targets.t()

                targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()

                negative_indices = 1 + (~positive_indices)

                regression_diff = torch.abs(
                    targets -
                    regression[positive_indices, :])  # (num_positives, 4)

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                regression_losses.append(torch.tensor(0).float().cuda())

        return torch.stack(classification_losses).mean(
            dim=0,
            keepdim=True), torch.stack(regression_losses).mean(dim=0,
                                                               keepdim=True)
Esempio n. 18
0
def infer_tacotron2_trt(encoder, decoder_iter, postnet,
                        encoder_context, decoder_context, postnet_context,
                        sequences, sequence_lengths, measurements, fp16):

    memory = torch.zeros((len(sequence_lengths), sequence_lengths[0], 512)).cuda()
    if fp16:
        memory = memory.half()
    device = memory.device
    dtype = memory.dtype

    processed_memory = torch.zeros((len(sequence_lengths),sequence_lengths[0],128), device=device, dtype=dtype)
    lens = torch.zeros_like(sequence_lengths)

    encoder_tensors = {
        # inputs
        'sequences': sequences, 'sequence_lengths': sequence_lengths,
        # outputs
        'memory': memory, 'lens': lens, 'processed_memory': processed_memory
    }

    print("Running Tacotron2 Encoder")
    with MeasureTime(measurements, "tacotron2_encoder_time"):
        run_trt_engine(encoder_context, encoder, encoder_tensors)

    device = memory.device
    mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32, device = device)
    not_finished = torch.ones([memory.size(0)], dtype=torch.int32, device = device)
    mel_outputs, gate_outputs, alignments = (torch.zeros(1, device = device), torch.zeros(1, device = device), torch.zeros(1, device = device))
    gate_threshold = 0.5
    max_decoder_steps = 1664
    first_iter = True

    decoder_inputs = init_decoder_inputs(memory, processed_memory, sequence_lengths)
    decoder_outputs = init_decoder_outputs(memory, sequence_lengths)

    print("Running Tacotron2 Decoder")
    measurements_decoder = {}
    while True:
        decoder_tensors = init_decoder_tensors(decoder_inputs, decoder_outputs)
        with MeasureTime(measurements_decoder, "step"):
            run_trt_engine(decoder_context, decoder_iter, decoder_tensors)

        if first_iter:
            mel_outputs = torch.unsqueeze(decoder_outputs[7], 2)
            gate_outputs = torch.unsqueeze(decoder_outputs[8], 2)
            alignments = torch.unsqueeze(decoder_outputs[4], 2)
            measurements['tacotron2_decoder_time'] = measurements_decoder['step']
            first_iter = False
        else:
            mel_outputs = torch.cat((mel_outputs, torch.unsqueeze(decoder_outputs[7], 2)), 2)
            gate_outputs = torch.cat((gate_outputs, torch.unsqueeze(decoder_outputs[8], 2)), 2)
            alignments = torch.cat((alignments, torch.unsqueeze(decoder_outputs[4], 2)), 2)
            measurements['tacotron2_decoder_time'] += measurements_decoder['step']

        dec = torch.le(torch.sigmoid(decoder_outputs[8]), gate_threshold).to(torch.int32).squeeze(1)
        not_finished = not_finished*dec
        mel_lengths += not_finished

        if torch.sum(not_finished) == 0:
            print("Stopping after",mel_outputs.size(2),"decoder steps")
            break
        if mel_outputs.size(2) == max_decoder_steps:
            print("Warning! Reached max decoder steps")
            break

        decoder_inputs, decoder_outputs = swap_inputs_outputs(decoder_inputs, decoder_outputs)

    mel_outputs_postnet = torch.zeros_like(mel_outputs, device=device, dtype=dtype)

    postnet_tensors = {
        # inputs
        'mel_outputs': mel_outputs,
        # outputs
        'mel_outputs_postnet': mel_outputs_postnet
    }
    print("Running Tacotron2 Postnet")
    with MeasureTime(measurements, "tacotron2_postnet_time"):
        run_trt_engine(postnet_context, postnet, postnet_tensors)

    print("Tacotron2 Postnet done")

    return mel_outputs_postnet, mel_lengths
Esempio n. 19
0
def infer_tacotron2_trt(encoder, decoder_iter, postnet, encoder_context,
                        decoder_context, postnet_context, sequences,
                        sequence_lengths, measurements, fp16, loop):

    batch_size = len(sequence_lengths)
    max_sequence_len = sequence_lengths[0]
    memory = torch.zeros((batch_size, max_sequence_len, 512)).cuda()
    if fp16:
        memory = memory.half()
    device = memory.device
    dtype = memory.dtype

    processed_memory = torch.zeros((batch_size, max_sequence_len, 128),
                                   device=device,
                                   dtype=dtype)
    lens = torch.zeros_like(sequence_lengths)
    print(f"batch_size: {batch_size}, max sequence length: {max_sequence_len}")

    encoder_tensors = {
        "inputs": {
            'sequences': sequences,
            'sequence_lengths': sequence_lengths
        },
        "outputs": {
            'memory': memory,
            'lens': lens,
            'processed_memory': processed_memory
        }
    }

    print("Running Tacotron2 Encoder")
    with MeasureTime(measurements, "tacotron2_encoder_time"):
        run_trt_engine(encoder_context, encoder, encoder_tensors)
    max_decoder_steps = 1024
    device = memory.device
    mel_lengths = torch.zeros([memory.size(0)],
                              dtype=torch.int32,
                              device=device)
    not_finished = torch.ones([memory.size(0)],
                              dtype=torch.int32,
                              device=device)
    mel_outputs = torch.ones((batch_size, 80, max_decoder_steps),
                             device=device,
                             dtype=dtype).cuda()
    gate_threshold = 0.5
    first_iter = True

    decoder_inputs = init_decoder_inputs(memory, processed_memory,
                                         sequence_lengths)
    decoder_outputs = init_decoder_outputs(memory, sequence_lengths)

    if loop:
        if decoder_context is None:
            print("Running Tacotron2 Decoder with loop with ONNX-RT")
            decoder_inputs_onnxrt = [
                x.cpu().numpy().copy() for x in decoder_inputs
            ]
            import onnx
            import onnxruntime
            sess = onnxruntime.InferenceSession(decoder_iter)

            with MeasureTime(measurements, "tacotron2_decoder_time"):
                result = sess.run(
                    ["mel_outputs", "mel_lengths_t"], {
                        'decoder_input_0': decoder_inputs_onnxrt[0],
                        'attention_hidden_0': decoder_inputs_onnxrt[1],
                        'attention_cell_0': decoder_inputs_onnxrt[2],
                        'decoder_hidden_0': decoder_inputs_onnxrt[3],
                        'decoder_cell_0': decoder_inputs_onnxrt[4],
                        'attention_weights_0': decoder_inputs_onnxrt[5],
                        'attention_weights_cum_0': decoder_inputs_onnxrt[6],
                        'attention_context_0': decoder_inputs_onnxrt[7],
                        'memory': decoder_inputs_onnxrt[8],
                        'processed_memory': decoder_inputs_onnxrt[9],
                        'mask': decoder_inputs_onnxrt[10]
                    })

            mel_outputs = torch.tensor(result[0], device=device)
            mel_lengths = torch.tensor(result[1], device=device)
        else:
            print("Running Tacotron2 Decoder with loop")
            decoder_tensors = {
                "inputs": {
                    'decoder_input_0': decoder_inputs[0],
                    'attention_hidden_0': decoder_inputs[1],
                    'attention_cell_0': decoder_inputs[2],
                    'decoder_hidden_0': decoder_inputs[3],
                    'decoder_cell_0': decoder_inputs[4],
                    'attention_weights_0': decoder_inputs[5],
                    'attention_weights_cum_0': decoder_inputs[6],
                    'attention_context_0': decoder_inputs[7],
                    'memory': decoder_inputs[8],
                    'processed_memory': decoder_inputs[9],
                    'mask': decoder_inputs[10]
                },
                "outputs": {
                    'mel_outputs': mel_outputs,
                    'mel_lengths_t': mel_lengths
                }
            }

            with MeasureTime(measurements, "tacotron2_decoder_time"):
                run_trt_engine(decoder_context, decoder_iter, decoder_tensors)
            mel_outputs = mel_outputs[:, :, :torch.max(mel_lengths)]

    else:
        print("Running Tacotron2 Decoder")
        measurements_decoder = {}
        while True:
            decoder_tensors = init_decoder_tensors(decoder_inputs,
                                                   decoder_outputs)
            with MeasureTime(measurements_decoder, "step"):
                run_trt_engine(decoder_context, decoder_iter, decoder_tensors)

            if first_iter:
                mel_outputs = torch.unsqueeze(decoder_outputs[7], 2)
                gate_outputs = torch.unsqueeze(decoder_outputs[8], 2)
                alignments = torch.unsqueeze(decoder_outputs[4], 2)
                measurements['tacotron2_decoder_time'] = measurements_decoder[
                    'step']
                first_iter = False
            else:
                mel_outputs = torch.cat(
                    (mel_outputs, torch.unsqueeze(decoder_outputs[7], 2)), 2)
                gate_outputs = torch.cat(
                    (gate_outputs, torch.unsqueeze(decoder_outputs[8], 2)), 2)
                alignments = torch.cat(
                    (alignments, torch.unsqueeze(decoder_outputs[4], 2)), 2)
                measurements['tacotron2_decoder_time'] += measurements_decoder[
                    'step']

            dec = torch.le(torch.sigmoid(decoder_outputs[8]),
                           gate_threshold).to(torch.int32).squeeze(1)
            not_finished = not_finished * dec
            mel_lengths += not_finished

            if torch.sum(not_finished) == 0:
                print("Stopping after", mel_outputs.size(2), "decoder steps")
                break
            if mel_outputs.size(2) == max_decoder_steps:
                print("Warning! Reached max decoder steps")
                break

            decoder_inputs, decoder_outputs = swap_inputs_outputs(
                decoder_inputs, decoder_outputs)

    mel_outputs = mel_outputs.clone().detach()
    mel_outputs_postnet = torch.zeros_like(mel_outputs,
                                           device=device,
                                           dtype=dtype)

    postnet_tensors = {
        "inputs": {
            'mel_outputs': mel_outputs
        },
        "outputs": {
            'mel_outputs_postnet': mel_outputs_postnet
        }
    }
    print("Running Tacotron2 Postnet")
    with MeasureTime(measurements, "tacotron2_postnet_time"):
        run_trt_engine(postnet_context, postnet, postnet_tensors)

    print("Tacotron2 Postnet done")

    return mel_outputs_postnet, mel_lengths
Esempio n. 20
0
    def forward(self, img_batch_shape, attention_mask, bboxs):

        h, w = img_batch_shape[2], img_batch_shape[3]

        mask_losses = []

        batch_size = bboxs.shape[0]
        for j in range(batch_size):

            bbox_annotation = bboxs[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            cond1 = torch.le(bbox_annotation[:, 0], w)
            cond2 = torch.le(bbox_annotation[:, 1], h)
            cond3 = torch.le(bbox_annotation[:, 2], w)
            cond4 = torch.le(bbox_annotation[:, 3], h)
            cond = cond1 * cond2 * cond3 * cond4

            bbox_annotation = bbox_annotation[cond, :]

            if bbox_annotation.shape[0] == 0:
                mask_losses.append(torch.tensor(0).float().cuda())
                continue

            bbox_area = (bbox_annotation[:, 2] - bbox_annotation[:, 0]) * (
                bbox_annotation[:, 3] - bbox_annotation[:, 1])

            mask_loss = []
            for id in range(len(attention_mask)):

                attention_map = attention_mask[id][j, 0, :, :]

                min_area = (2**(id + 5))**2 * 0.5
                max_area = (2**(id + 5) * 1.58)**2 * 2

                level_bbox_indice1 = torch.ge(bbox_area, min_area)
                level_bbox_indice2 = torch.le(bbox_area, max_area)

                level_bbox_indice = level_bbox_indice1 * level_bbox_indice2

                level_bbox_annotation = bbox_annotation[
                    level_bbox_indice, :].clone()

                #level_bbox_annotation = bbox_annotation.clone()

                attention_h, attention_w = attention_map.shape

                if level_bbox_annotation.shape[0]:
                    level_bbox_annotation[:, 0] *= attention_w / w
                    level_bbox_annotation[:, 1] *= attention_h / h
                    level_bbox_annotation[:, 2] *= attention_w / w
                    level_bbox_annotation[:, 3] *= attention_h / h

                mask_gt = torch.zeros(attention_map.shape)
                mask_gt = mask_gt.cuda()

                for i in range(level_bbox_annotation.shape[0]):

                    x1 = max(int(level_bbox_annotation[i, 0]), 0)
                    y1 = max(int(level_bbox_annotation[i, 1]), 0)
                    x2 = min(
                        math.ceil(level_bbox_annotation[i, 2]) + 1,
                        attention_w)
                    y2 = min(
                        math.ceil(level_bbox_annotation[i, 3]) + 1,
                        attention_h)

                    mask_gt[y1:y2, x1:x2] = 1

                mask_gt = mask_gt[mask_gt >= 0]
                mask_predict = attention_map[attention_map >= 0]

                mask_loss.append(F.binary_cross_entropy(mask_predict, mask_gt))
            mask_losses.append(torch.stack(mask_loss).mean())

        return torch.stack(mask_losses).mean(dim=0, keepdim=True)
Esempio n. 21
0
    def forward(self, center_maps, scale_maps, annotations, stride=4):
        batch_size = center_maps.size()[0]
        scale_losses = []
        center_losses = []
        for i in range(batch_size):
            boxes = annotations[i]
            center_map = center_maps[i]
            scale_map = scale_maps[i]
            boxes = (boxes // stride).long()
            center_map = torch.clamp(center_map, 1e-4, 1.0 - 1e-4)
            x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
            center_x, center_y, width, height = (x1 + x2) / 2, (
                y1 + y2) / 2, x2 - x1, y2 - y1
            center_gt = torch.zeros(center_map.shape).cuda()
            #
            #print(center_gt.size())
            scale_gt = torch.zeros(scale_map.shape).cuda()
            center_gt[:, center_y, center_x] = 1.0
            region_x = torch.cat([
                center_x - 2, center_x - 1, center_x, center_x + 1,
                center_x + 2
            ])
            region_y = torch.cat([
                center_y - 2, center_y - 1, center_y, center_y + 1,
                center_y + 2
            ])
            scale_gt[:, region_y.cuda(),
                     region_x.cuda()] = (torch.log(height.float())).repeat(
                         5, ).cuda()
            Gauss_map = torch.zeros(center_map.shape).cuda()
            pos_map = torch.zeros(center_map.shape).cuda()
            K = boxes.size()[0]

            for i in range(K):
                c_x, c_y, w, h = center_x[i], center_y[i], width[i], height[i]
                k_Gauss = get_mask(w, h, c_x, c_y)
                Gauss_map[:, y1[i]:y2[i], x1[i]:x2[i]] = torch.max(
                    k_Gauss.unsqueeze(0), Gauss_map[:, y1[i]:y2[i],
                                                    x1[i]:x2[i]])
                pos_map[:, y1[i]:y2[i], x1[i]:x2[i]] = 1

            Gauss_map = torch.pow(1.0 - Gauss_map, self.beta)
            Gauss_map = Gauss_map * pos_map
            #ipdb.set_trace()

            alpha_factor = torch.ones(center_map.shape).cuda() * self.alpha
            alpha_factor = torch.where(torch.eq(center_gt, 1.), alpha_factor,
                                       Gauss_map)
            focal_weight = torch.where(torch.eq(center_gt, 1.),
                                       1.0 - center_map, center_map)
            focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma)
            bce = -(center_gt * torch.log(center_map) +
                    (1.0 - center_gt) * torch.log(1.0 - center_map))
            center_loss = focal_weight * bce
            center_loss = center_loss.sum() / max(1.0, K)

            center_losses.append(center_loss)

            scale_diff = torch.abs(scale_gt - scale_map)
            scale_loss = torch.where(torch.le(scale_diff, 1.0),
                                     0.5 * torch.pow(scale_diff, 2),
                                     scale_diff - 0.5)

            scale_loss = torch.where(torch.ne(scale_gt, 0.), scale_loss,
                                     torch.zeros(scale_loss.shape).cuda())

            scale_losses.append(scale_loss.sum() / max(1.0, K))

            return torch.stack(center_losses).mean(
                dim=0,
                keepdim=True), torch.stack(scale_losses).mean(dim=0,
                                                              keepdim=True)
Esempio n. 22
0
    def forward(ctx, classifications, regressions, anchors, annotations):
 
        batch_size = classifications.shape[0]
        regression_losses = []

        regression_grads=torch.zeros(regressions.shape).cuda()
        p_num=torch.zeros(1).cuda()
        labels_b=[]

        anchor = anchors[0, :, :].type(torch.cuda.FloatTensor)

        anchor_widths  = anchor[:, 2] - anchor[:, 0]+1.0
        anchor_heights = anchor[:, 3] - anchor[:, 1]+1.0
        anchor_ctr_x   = anchor[:, 0] + 0.5 * (anchor_widths-1.0)
        anchor_ctr_y   = anchor[:, 1] + 0.5 * (anchor_heights-1.0)

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            if bbox_annotation.shape[0] == 0:
                regression_losses.append(torch.tensor(0).float().cuda())
                labels_b.append(torch.zeros(classification.shape).cuda())
                continue

            IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations

            IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1

            # compute the loss for classification
            targets = torch.ones(classification.shape) * -1
            targets = targets.cuda()

            ######
            gt_IoU_max, gt_IoU_argmax = torch.max(IoU, dim=0)
            gt_IoU_argmax=torch.where(IoU==gt_IoU_max)[0]
            positive_indices = torch.ge(torch.zeros(IoU_max.shape).cuda(),1)
            positive_indices[gt_IoU_argmax.long()] = True
            ######

            positive_indices = positive_indices | torch.ge(IoU_max, 0.5)
            negative_indices = torch.lt(IoU_max, 0.4)

            p_num+=positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[negative_indices, :] = 0
            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1
            labels_b.append(targets)

            # compute the loss for regression
            if positive_indices.sum() > 0:

                assigned_annotations = assigned_annotations[positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths  = assigned_annotations[:, 2] - assigned_annotations[:, 0]+1.0
                gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]+1.0
                gt_ctr_x   = assigned_annotations[:, 0] + 0.5 * (gt_widths-1.0)
                gt_ctr_y   = assigned_annotations[:, 1] + 0.5 * (gt_heights-1.0)

                # clip widths to 1
                gt_widths  = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets2 = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh))
                targets2 = targets2.t()

                targets2 = targets2/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda()

                #negative_indices = ~ positive_indices

                regression_diff = regression[positive_indices, :]-targets2
                regression_diff_abs= torch.abs(regression_diff)

                regression_loss = torch.where(
                    torch.le(regression_diff_abs, 1.0 / 1.0),
                    0.5 * 1.0 * torch.pow(regression_diff_abs, 2),
                    regression_diff_abs - 0.5 / 1.0
                )
                regression_losses.append(regression_loss.sum())


                regression_grad=torch.where(
                    torch.le(regression_diff_abs,1.0/1.0),
                    1.0*regression_diff,
                    torch.sign(regression_diff))
                regression_grads[j,positive_indices,:]=regression_grad

            else:
                regression_losses.append(torch.tensor(0).float().cuda())

        p_num=torch.clamp(p_num,min=1)
        regression_grads/=(4*p_num)

        ########################AP-LOSS##########################
        labels_b=torch.stack(labels_b)
        classification_grads,classification_losses=AP_loss(classifications,labels_b)
        #########################################################

        ctx.save_for_backward(classification_grads,regression_grads)
        return classification_losses, torch.stack(regression_losses).sum(dim=0, keepdim=True)/p_num
Esempio n. 23
0
def get_mask_from_lengths(lengths):
    max_len = torch.max(lengths).item()
    ids = torch.arange(0, max_len, device=lengths.device, dtype=lengths.dtype)
    mask = (ids < lengths.unsqueeze(1)).byte()
    mask = torch.le(mask, 0)
    return mask
Esempio n. 24
0
def preprocess(sample_dict, pre_x2d, out_dim, rescale_dist=0.0):
    rand_angle = np.random.random_sample() * 2.0 * np.pi
    rand_R = quaternion_matrix(quaternion_about_axis(rand_angle, (0.0, 1.0, 0.0)))[:3, :3]
    rand_R = torch.FloatTensor(rand_R).unsqueeze(0)

    scene_rgb = sample_dict['frames_img'][:, :5, ...].cuda()
    scene_depth = sample_dict['frames_depth'][:, :5, ...].cuda()
    scene_K = sample_dict['frames_K'][:, :5, ...].cuda()
    scene_Tcw = sample_dict['frames_Tcw'][:, :5, ...]
    scene_ori_rgb = sample_dict['frames_ori_img'][:, :5, ...].cuda()
    scene_neg_tags = sample_dict['frames_neg_tags'][:, :5, ...].cuda()

    N, L, C, H, W = scene_rgb.shape
    # scene_rgb = scene_rgb.view(N, L, C, H, W)
    scene_depth = scene_depth.view(N * L, 1, H, W)
    scene_K = scene_K.view(N * L, 3, 3)
    scene_Tcw = scene_Tcw.view(N * L, 3, 4)

    # generate 3D world position of scene
    d = scene_depth.view(N * L, H * W, 1)  # dim (N*L, H*W, 1)
    X_3d = batched_pi_inv(scene_K, pre_x2d, d)  # dim (N*L, H*W, 3)
    Rwc, twc = batched_inv_pose(R=scene_Tcw[:, :3, :3],
                                t=scene_Tcw[:, :3, 3].squeeze(-1))  # dim (N*L, 3, 3), (N, 3)
    X_world = batched_transpose(Rwc.cuda(), twc.cuda(), X_3d)  # dim (N*L, H*W, 3)
    X_world = X_world.contiguous().view(N, L * H * W, 3)        # dim (N, L*H*W, 3)
    scene_center = torch.mean(X_world, dim=1)  # dim (N, 3)
    X_world -= scene_center.view(N, 1, 3)
    X_world = batched_transpose(rand_R.cuda().expand(N, 3, 3),
                                torch.zeros(1, 3, 1).cuda().expand(N, 3, 1),
                                X_world)  # dim (N, L*H*W, 3), data augmentation
    X_world = X_world.view(N, L, H, W, 3).permute(0, 1, 4, 2, 3).contiguous()  # dim (N, L, 3, H, W)

    # query image:
    query_img = sample_dict['img']
    query_ori_img = sample_dict['ori_img']

    # compute multiscale ground truth query_X_worlds & valid_masks
    query_X_worlds = []
    valid_masks = []
    out_H, out_W = out_dim
    query_depth = sample_dict['depth'].cuda()
    ori_query_depth = query_depth.clone()
    N, C, H, W = query_depth.shape
    for i in range(4):
        query_depth_patch = F.unfold(
            query_depth,
            kernel_size=(H // out_H, W // out_W),
            stride=(H // out_H, W // out_W)
        ).view(N, -1, out_H, out_W)
        mask = torch.gt(query_depth_patch, 1e-5)
        count = torch.sum(mask.float(), dim=1)
        query_depth_down = torch.sum(query_depth_patch * mask.float(), dim=1) / \
                           torch.where(torch.le(count, 1e-5),
                                       torch.full(count.shape, 1e6).to(count.device),
                                       count)  # (N, 1, out_H, out_W)
        query_Tcw = sample_dict['Tcw']
        query_K = sample_dict['K'].clone().cuda()
        query_K[:, 0, 0] *= out_W / W
        query_K[:, 0, 2] *= out_W / W
        query_K[:, 1, 1] *= out_H / H
        query_K[:, 1, 2] *= out_H / H
        query_d = query_depth_down.view(N, out_H * out_W, 1)  # dim (N, H*W, 1)
        out_x_2d = x_2d_coords_torch(N, out_H, out_W).cuda().view(N, -1, 2)
        query_X_3d = batched_pi_inv(query_K, out_x_2d, query_d)  # dim (N, H*W, 3)
        query_Rwc, query_twc = batched_inv_pose(R=query_Tcw[:, :3, :3],
                                                t=query_Tcw[:, :3, 3].squeeze(-1))  # dim (N, 3, 3), (N, 3)
        query_X_world = batched_transpose(query_Rwc.cuda(), query_twc.cuda(), query_X_3d)  # dim (N, H*W, 3)
        query_X_world -= scene_center.view(N, 1, 3)
        query_X_world = batched_transpose(rand_R.cuda().expand(N, 3, 3),
                                          torch.zeros(1, 3, 1).cuda().expand(N, 3, 1),
                                          query_X_world)  # dim (N, H*W, 3), data augmentation
        query_X_world = query_X_world.permute(0, 2, 1).view(N, 3, out_H, out_W).contiguous()  # dim (N, 3, H, W)
        query_X_worlds.append(query_X_world.cuda())

        valid_masks.append(torch.gt(query_depth_down, 1e-5).cuda().view(N, out_H, out_W))

        if i == 3:
            query_X_worlds.append(query_X_world.cuda())
            valid_masks.append(torch.gt(query_depth_down, 1e-5).cuda().view(N, out_H, out_W))

        out_H //= 2
        out_W //= 2

    # compute norm_query_Tcw for normalized scene coordinate
    query_twc = query_twc.cuda() - scene_center.view(N, 3, 1)
    norm_query_Twc = torch.cat([query_Rwc.cuda(), query_twc], dim=-1)  # dim (N, 3, 4)
    norm_query_Twc = torch.bmm(rand_R.cuda().expand(N, 3, 3), norm_query_Twc)  # dim (N, 3, 4)
    query_Rcw, query_tcw = batched_inv_pose(R=norm_query_Twc[:, :3, :3],
                                            t=norm_query_Twc[:, :3, 3].squeeze(-1))  # dim (N, 3, 3), (N, 3)
    norm_query_Tcw = torch.cat([query_Rcw, query_tcw.view(N, 3, 1)], dim=-1)  # dim (N, 3, 4)

    # compute down sampled query K
    out_H, out_W = out_dim
    query_K = sample_dict['K'].clone().cuda()
    query_K[:, 0, 0] *= out_W / W
    query_K[:, 0, 2] *= out_W / W
    query_K[:, 1, 1] *= out_H / H
    query_K[:, 1, 2] *= out_H / H

    if rescale_dist > 0:
        query_X_worlds, X_world, rescale_factor = rescale_scene_coords(query_X_worlds, X_world, scene_neg_tags, rescale_dist)
    else:
        rescale_factor = torch.ones(N)
    scene_input = torch.cat((scene_rgb, X_world), dim=2)

    return scene_input.cuda(), query_img.cuda(), query_X_worlds[::-1], valid_masks[::-1], \
           scene_ori_rgb.cuda(), query_ori_img.cuda(), X_world.cuda(), \
           torch.gt(scene_depth, 1e-5).cuda().view(N, L, H, W), norm_query_Tcw, query_K, scene_neg_tags, rescale_factor.cuda()
Esempio n. 25
0
    def forward(self, classifications, regressions, anchors, annotations):
        alpha = 0.75  # 0.25
        gamma = 2.0
        ignores = annotations[:, :, [-1]]
        annotations = annotations[:, :, 0: -1]
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[0, :, :]

        anchor_widths = anchor[:, 2] - anchor[:, 0]
        anchor_heights = anchor[:, 3] - anchor[:, 1]
        anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, -1] != -1]

            ignore = ignores[j, :, :]
            ignore = ignore[ignore[:, -1] != -1]

            if bbox_annotation.shape[0] == 0:
                regression_losses.append(torch.tensor(0).float().cuda())
                classification_losses.append(torch.tensor(0).float().cuda())

                continue

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, 4: 8])  # num_anchors x num_annotations

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)  # num_anchors x 1

            # import pdb
            # pdb.set_trace()

            # compute the loss for classification
            targets = torch.ones(classification.shape) * -1
            targets = targets.cuda()

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            assigned_ignores = ignore[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices, -1].long()] = 1

            alpha_factor = torch.ones(targets.shape).cuda() * alpha

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))

            # cls_loss = focal_weight * torch.pow(bce, gamma)
            cls_loss = focal_weight * bce

            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())

            classification_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0))

            # compute the loss for regression

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[positive_indices, :]

                assigned_ignores = assigned_ignores[positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths_h = assigned_annotations[:, 2] - assigned_annotations[:, 0]
                gt_heights_h = assigned_annotations[:, 3] - assigned_annotations[:, 1]
                gt_ctr_x_h = assigned_annotations[:, 0] + 0.5 * gt_widths_h
                gt_ctr_y_h = assigned_annotations[:, 1] + 0.5 * gt_heights_h

                gt_widths_f = assigned_annotations[:, 6] - assigned_annotations[:, 4]
                gt_heights_f = assigned_annotations[:, 7] - assigned_annotations[:, 5]
                gt_ctr_x_f = assigned_annotations[:, 4] + 0.5 * gt_widths_f
                gt_ctr_y_f = assigned_annotations[:, 5] + 0.5 * gt_heights_f

                # clip widths to 1
                gt_widths_h = torch.clamp(gt_widths_h, min=1)
                gt_heights_h = torch.clamp(gt_heights_h, min=1)

                gt_widths_f = torch.clamp(gt_widths_f, min=1)
                gt_heights_f = torch.clamp(gt_heights_f, min=1)

                targets_dx_f = (gt_ctr_x_f - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy_f = (gt_ctr_y_f - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw_f = torch.log(gt_widths_f / anchor_widths_pi)
                targets_dh_f = torch.log(gt_heights_f / anchor_heights_pi)

                targets_dx_h = (gt_ctr_x_h - anchor_ctr_x_pi) / anchor_widths_pi * 4
                targets_dy_h = (gt_ctr_y_h - anchor_ctr_y_pi) / anchor_heights_pi * 4
                targets_dw_h = torch.log(gt_widths_h / anchor_widths_pi * 4)
                targets_dh_h = torch.log(gt_heights_h / anchor_heights_pi * 4)

                targets = torch.stack((targets_dx_f, targets_dy_f, targets_dw_f, targets_dh_f,
                                       targets_dx_h, targets_dy_h, targets_dw_h, targets_dh_h))
                targets = targets.t()

                targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2]]).cuda()

                negative_indices = 1 - positive_indices

                regression_diff = torch.abs(targets - regression[positive_indices, :])

                weights = torch.ones(regression_diff.shape).cuda()
                if only_full:
                    weights[:, 4:] = 0
                else:
                    weights[:, 4:] = 1 - assigned_ignores
                regression_diff = regression_diff * weights

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0
                )
                if only_full:
                    regression_losses.append(regression_loss[:, 0:4].mean())
                else:
                    regression_losses.append(regression_loss.mean())
            else:
                regression_losses.append(torch.tensor(0).float().cuda())

        return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0,
                                                                                                                 keepdim=True)
Esempio n. 26
0
def _kl_continuous_bernoulli_uniform(p, q):
    result = -p.entropy() + (q.high - q.low).log()
    return torch.where(
        torch.max(torch.ge(q.low, p.support.lower_bound),
                  torch.le(q.high, p.support.upper_bound)),
        torch.ones_like(result) * inf, result)
 def safe_log(self, tensor, eps=1e-16):
     is_zero = torch.le(tensor, eps)
     tensor = torch.where(is_zero, torch.ones_like(tensor), tensor)
     tensor = torch.where(is_zero, torch.zeros_like(tensor),
                          torch.log(tensor))
     return tensor
Esempio n. 28
0
def train(args):
    #for creating the visdom object
    DEFAULT_PORT = 8097
    DEFAULT_HOSTNAME = "http://localhost"
    viz = Visdom(DEFAULT_HOSTNAME, DEFAULT_PORT, ipv6=False)

    hyparam_list = [
        ("model", args.model_name),
        ("cube", args.cube_len),
        ("bs", args.batch_size),
        ("g_lr", args.g_lr),
        ("d_lr", args.d_lr),
        ("z", args.z_dis),
        ("bias", args.bias),
        ("sl", args.soft_label),
    ]

    hyparam_dict = OrderedDict(((arg, value) for arg, value in hyparam_list))
    log_param = make_hyparam_string(hyparam_dict)
    print(log_param)

    # for using tensorboard
    if args.use_tensorboard:
        import tensorflow as tf

        summary_writer = tf.summary.FileWriter(args.output_dir + args.log_dir +
                                               log_param)

        def inject_summary(summary_writer, tag, value, step):
            summary = tf.Summary(
                value=[tf.Summary.Value(tag=tag, simple_value=value)])
            summary_writer.add_summary(summary, global_step=step)

        inject_summary = inject_summary

    # datset define
    dsets_path = args.input_dir + args.data_dir + "train/"
    print(dsets_path)

    x_train = np.load("voxels_3DMNIST_16.npy")
    dataset = x_train.reshape(-1,
                              args.cube_len * args.cube_len * args.cube_len)
    print(dataset.shape)
    dset_loaders = torch.utils.data.DataLoader(dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=1)

    # model define
    D = _D(args)
    G = _G(args)

    D_solver = optim.Adam(D.parameters(), lr=args.d_lr, betas=args.beta)
    G_solver = optim.Adam(G.parameters(), lr=args.g_lr, betas=args.beta)

    if torch.cuda.is_available():
        print("using cuda")
        D.cuda()
        G.cuda()

    criterion = nn.BCELoss()

    pickle_path = "." + args.pickle_dir + log_param
    read_pickle(pickle_path, G, G_solver, D, D_solver)

    for epoch in range(args.n_epochs):
        epoch_start_time = time.time()
        print("epoch %d started" % (epoch))
        for i, X in enumerate(dset_loaders):

            X = var_or_cuda(X)
            X = X.type(torch.cuda.FloatTensor)
            if X.size()[0] != int(args.batch_size):
                #print("batch_size != {} drop last incompatible batch".format(int(args.batch_size)))
                continue

            Z = generateZ(args)
            real_labels = var_or_cuda(torch.ones(args.batch_size)).view(
                -1, 1, 1, 1, 1)
            fake_labels = var_or_cuda(torch.zeros(args.batch_size)).view(
                -1, 1, 1, 1, 1)

            if args.soft_label:
                real_labels = var_or_cuda(
                    torch.Tensor(args.batch_size).uniform_(0.9, 1.1)).view(
                        -1, 1, 1, 1, 1)  ####
                #fake_labels = var_or_cuda(torch.Tensor(args.batch_size).uniform_(0, 0.3)).view(-1,1,1,1,1)
                fake_labels = var_or_cuda(torch.zeros(args.batch_size)).view(
                    -1, 1, 1, 1, 1)  #####
            # ============= Train the discriminator =============#
            d_real = D(X)
            d_real_loss = criterion(d_real, real_labels)

            fake = G(Z)
            d_fake = D(fake)
            d_fake_loss = criterion(d_fake, fake_labels)

            d_loss = d_real_loss + d_fake_loss

            d_real_acu = torch.ge(d_real.squeeze(), 0.5).float()
            d_fake_acu = torch.le(d_fake.squeeze(), 0.5).float()
            d_total_acu = torch.mean(torch.cat((d_real_acu, d_fake_acu), 0))

            #if 1:
            if d_total_acu <= args.d_thresh:
                D.zero_grad()
                d_loss.backward()
                D_solver.step()

            # =============== Train the generator ===============#

            Z = generateZ(args)

            fake = G(Z)
            d_fake = D(fake)
            g_loss = criterion(d_fake, real_labels)

            D.zero_grad()
            G.zero_grad()
            g_loss.backward()
            G_solver.step()
            #######
            #print(fake.shape)
            #print(fake.cpu().data[:8].squeeze().numpy().shape)

            # =============== logging each iteration ===============#
            iteration = str(G_solver.state_dict()['state'][
                G_solver.state_dict()['param_groups'][0]['params'][0]]['step'])
            #print(type(iteration))
            #iteration = str(i)
            #saving the model and a image each 100 iteration
            if int(iteration) % 300 == 0:
                #pickle_save_path = args.output_dir + args.pickle_dir + log_param
                #save_new_pickle(pickle_save_path, iteration, G, G_solver, D, D_solver)
                samples = fake.cpu().data[:8].squeeze().numpy()

                #print(samples.shape)
                for s in range(8):
                    plotVoxelVisdom(samples[s, ...], viz,
                                    "Iteration:{:.4}".format(iteration))

#                 image_path = args.output_dir + args.image_dir + log_param
#                 if not os.path.exists(image_path):
#                     os.makedirs(image_path)

#                 SavePloat_Voxels(samples, image_path, iteration)
# =============== each epoch save model or save image ===============#
            print(
                'Iter-{}; , D_loss : {:.4}, G_loss : {:.4}, D_acu : {:.4}, D_lr : {:.4}'
                .format(iteration, d_loss.item(), g_loss.item(),
                        d_total_acu.item(),
                        D_solver.state_dict()['param_groups'][0]["lr"]))

        epoch_end_time = time.time()

        if (epoch + 1) % args.image_save_step == 0:

            samples = fake.cpu().data[:8].squeeze().numpy()

            image_path = args.output_dir + args.image_dir + log_param
            if not os.path.exists(image_path):
                os.makedirs(image_path)

            SavePloat_Voxels(samples, image_path, iteration)

        if (epoch + 1) % args.pickle_step == 0:
            pickle_save_path = args.output_dir + args.pickle_dir + log_param
            save_new_pickle(pickle_save_path, iteration, G, G_solver, D,
                            D_solver)

        print("epoch time", (epoch_end_time - epoch_start_time) / 60)
        print("epoch %d ended" % (epoch))
        print("################################################")
    def forward(self, classifications, regressions, anchors, annotations,
                **kwargs):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[
            0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 0] != -1]

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = alpha_factor.cuda()
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                    classification_losses.append(cls_loss.sum())
                else:

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(torch.tensor(0).to(dtype))
                    classification_losses.append(cls_loss.sum())

                continue

            IoU = calc_iou(anchor[:, :], bbox_annotation[:, 1:])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            # compute the loss for classification
            targets = torch.ones_like(classification) * -1
            if torch.cuda.is_available():
                targets = targets.cuda()

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices,
                                                           0].long()] = 1

            alpha_factor = torch.ones_like(targets) * alpha
            if torch.cuda.is_available():
                alpha_factor = alpha_factor.cuda()

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) +
                    (1.0 - targets) * torch.log(1.0 - classification))

            cls_loss = focal_weight * bce

            zeros = torch.zeros_like(cls_loss)
            if torch.cuda.is_available():
                zeros = zeros.cuda()
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)

            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 3] - assigned_annotations[:,
                                                                           1]
                gt_heights = assigned_annotations[:,
                                                  4] - assigned_annotations[:,
                                                                            2]
                gt_ctr_x = assigned_annotations[:, 1] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 2] + 0.5 * gt_heights

                # efficientdet style
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dy, targets_dx, targets_dh, targets_dw))
                targets = targets.t()

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))


        return torch.stack(classification_losses).mean(dim=0, keepdim=True), \
               torch.stack(regression_losses).mean(dim=0, keepdim=True)
Esempio n. 30
0
 def forward(self, pred, targ):
     reg_diff = torch.abs(targ - pred)
     reg_loss = torch.where(torch.le(reg_diff, 1 / 9),
                            4.5 * torch.pow(reg_diff, 2), reg_diff - 1 / 18)
     return reg_loss.mean()
Esempio n. 31
0
def _compute_fake_acc(predictions):
  predictions = torch.le(predictions.data, 0.5)
  if len(predictions.size()) == 3:
    predictions = predictions.view(predictions.size(0) * predictions.size(1) * predictions.size(2))
  acc = (predictions == 1).sum() / (1.0 * predictions.size(0))
  return acc
Esempio n. 32
0
    def forward(self, classifications, regressions, anchors, annotations,
                **kwargs):
        alpha = 0.25
        gamma = 2.0
        batch_size = classifications.shape[0]
        classification_losses = []
        regression_losses = []

        anchor = anchors[
            0, :, :]  # assuming all image sizes are the same, which it is
        dtype = anchors.dtype

        anchor_widths = anchor[:, 3] - anchor[:, 1]
        anchor_heights = anchor[:, 2] - anchor[:, 0]
        anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths
        anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights

        for j in range(batch_size):

            classification = classifications[j, :, :]
            regression = regressions[j, :, :]

            bbox_annotation = annotations[j]
            bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]

            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)

            if bbox_annotation.shape[0] == 0:
                if torch.cuda.is_available():

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = alpha_factor.cuda()
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                    classification_losses.append(cls_loss.sum())
                else:

                    alpha_factor = torch.ones_like(classification) * alpha
                    alpha_factor = 1. - alpha_factor
                    focal_weight = classification
                    focal_weight = alpha_factor * torch.pow(
                        focal_weight, gamma)

                    bce = -(torch.log(1.0 - classification))

                    cls_loss = focal_weight * bce

                    regression_losses.append(torch.tensor(0).to(dtype))
                    classification_losses.append(cls_loss.sum())

                continue

            IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4])

            IoU_max, IoU_argmax = torch.max(IoU, dim=1)

            # compute the loss for classification
            targets = torch.ones_like(classification) * -1
            if torch.cuda.is_available():
                targets = targets.cuda()

            targets[torch.lt(IoU_max, 0.4), :] = 0

            positive_indices = torch.ge(IoU_max, 0.5)

            num_positive_anchors = positive_indices.sum()

            assigned_annotations = bbox_annotation[IoU_argmax, :]

            targets[positive_indices, :] = 0
            targets[positive_indices, assigned_annotations[positive_indices,
                                                           4].long()] = 1

            alpha_factor = torch.ones_like(targets) * alpha
            if torch.cuda.is_available():
                alpha_factor = alpha_factor.cuda()

            alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor,
                                       1. - alpha_factor)
            focal_weight = torch.where(torch.eq(targets, 1.),
                                       1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

            bce = -(targets * torch.log(classification) +
                    (1.0 - targets) * torch.log(1.0 - classification))

            cls_loss = focal_weight * bce

            zeros = torch.zeros_like(cls_loss)
            if torch.cuda.is_available():
                zeros = zeros.cuda()
            cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)

            classification_losses.append(
                cls_loss.sum() /
                torch.clamp(num_positive_anchors.to(dtype), min=1.0))

            if positive_indices.sum() > 0:
                assigned_annotations = assigned_annotations[
                    positive_indices, :]

                anchor_widths_pi = anchor_widths[positive_indices]
                anchor_heights_pi = anchor_heights[positive_indices]
                anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
                anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

                gt_widths = assigned_annotations[:,
                                                 2] - assigned_annotations[:,
                                                                           0]
                gt_heights = assigned_annotations[:,
                                                  3] - assigned_annotations[:,
                                                                            1]
                gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
                gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

                # efficientdet style
                gt_widths = torch.clamp(gt_widths, min=1)
                gt_heights = torch.clamp(gt_heights, min=1)

                targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
                targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
                targets_dw = torch.log(gt_widths / anchor_widths_pi)
                targets_dh = torch.log(gt_heights / anchor_heights_pi)

                targets = torch.stack(
                    (targets_dy, targets_dx, targets_dh, targets_dw))
                targets = targets.t()

                regression_diff = torch.abs(targets -
                                            regression[positive_indices, :])

                regression_loss = torch.where(
                    torch.le(regression_diff, 1.0 / 9.0),
                    0.5 * 9.0 * torch.pow(regression_diff, 2),
                    regression_diff - 0.5 / 9.0)
                regression_losses.append(regression_loss.mean())
            else:
                if torch.cuda.is_available():
                    regression_losses.append(torch.tensor(0).to(dtype).cuda())
                else:
                    regression_losses.append(torch.tensor(0).to(dtype))

        # debug
        imgs = kwargs.get('imgs', None)
        if imgs is not None:
            regressBoxes = BBoxTransform()
            clipBoxes = ClipBoxes()
            obj_list = kwargs.get('obj_list', None)
            out = postprocess(
                imgs.detach(),
                torch.stack([anchors[0]] * imgs.shape[0], 0).detach(),
                regressions.detach(), classifications.detach(), regressBoxes,
                clipBoxes, 0.5, 0.3)
            imgs = imgs.permute(0, 2, 3, 1).cpu().numpy()
            imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) *
                    255).astype(np.uint8)
            imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs]
            display(out, imgs, obj_list, imshow=False, imwrite=True)

        return torch.stack(classification_losses).mean(dim=0, keepdim=True), \
               torch.stack(regression_losses).mean(dim=0, keepdim=True)
def test_inference(encoder, decoder_iter, postnet):

    encoder.eval()
    decoder_iter.eval()
    postnet.eval()

    from trt.inference_trt import init_decoder_inputs

    texts = ["Hello World, good day."]
    sequences, sequence_lengths = prepare_input_sequence(texts)

    measurements = {}

    print("Running Tacotron2 Encoder")
    with torch.no_grad():
        memory, processed_memory, lens = encoder(sequences, sequence_lengths)

    print("Running Tacotron2 Decoder")
    device = memory.device
    mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32, device = device)
    not_finished = torch.ones([memory.size(0)], dtype=torch.int32, device = device)
    mel_outputs, gate_outputs, alignments = (torch.zeros(1), torch.zeros(1), torch.zeros(1))
    gate_threshold = 0.6
    max_decoder_steps = 1000
    first_iter = True

    (decoder_input, attention_hidden, attention_cell, decoder_hidden,
     decoder_cell, attention_weights, attention_weights_cum,
     attention_context, memory, processed_memory,
     mask) = init_decoder_inputs(memory, processed_memory, sequence_lengths)

    while True:
        with torch.no_grad():
            (mel_output, gate_output,
             attention_hidden, attention_cell,
             decoder_hidden, decoder_cell,
             attention_weights, attention_weights_cum,
             attention_context) = decoder_iter(decoder_input, attention_hidden, attention_cell, decoder_hidden,
                                               decoder_cell, attention_weights, attention_weights_cum,
                                               attention_context, memory, processed_memory, mask)

        if first_iter:
            mel_outputs = torch.unsqueeze(mel_output, 2)
            gate_outputs = torch.unsqueeze(gate_output, 2)
            alignments = torch.unsqueeze(attention_weights, 2)
            first_iter = False
        else:
            mel_outputs = torch.cat((mel_outputs, torch.unsqueeze(mel_output, 2)), 2)
            gate_outputs = torch.cat((gate_outputs, torch.unsqueeze(gate_output, 2)), 2)
            alignments = torch.cat((alignments, torch.unsqueeze(attention_weights, 2)), 2)

        dec = torch.le(torch.sigmoid(gate_output), gate_threshold).to(torch.int32).squeeze(1)
        not_finished = not_finished*dec
        mel_lengths += not_finished

        if torch.sum(not_finished) == 0:
            print("Stopping after ",mel_outputs.size(2)," decoder steps")
            break
        if mel_outputs.size(2) == max_decoder_steps:
            print("Warning! Reached max decoder steps")
            break

        decoder_input = mel_output


    print("Running Tacotron2 PostNet")
    with torch.no_grad():
        mel_outputs_postnet = postnet(mel_outputs)

    return mel_outputs_postnet
Esempio n. 34
0
    def forward(self, x, y=None):
        relu_latent = []
        pool_latent = []
        bias_latent_cnn = []
        relu_latentpn = []
        mean_latent_cnn = []
        var_latent_cnn = []
        xbias = th.zeros([1, x.shape[1], x.shape[2], x.shape[3]], device=None)

        ############################  conv1 #####################################
        x = self.features[0](x)
        xbias = self.features[0](xbias)
        mean_latent_cnn.append(th.mean(x, dim=(0, 2, 3), keepdim=True))
        var_latent_cnn.append(
            th.mean((x - th.mean(x, dim=(0, 2, 3), keepdim=True))**2,
                    dim=(0, 2, 3),
                    keepdim=True))

        ############################   batchnorm1 ##################################
        x = self.features[1](x)
        xbias = self.insnorms_cnn[0](xbias)
        bias_latent_cnn.append(self.features[1].bias)
        ############################   relu1 ##################################
        x = self.features[2](x)
        xbias = self.features[2](xbias)
        relu_latent.append(th.gt(x, 0).float() + th.le(x, 0).float() * 0.1)

        #relu_latent and relu_latentpn keeps track of the pixels pool_latent are activated in the leaky relu
        relu_latentpn.append(
            th.gt(xbias, 0).float() + th.le(xbias, 0).float() * 0.1)

        ############################   pool1 ##################################

        pool_latent.append(
            th.ge(
                x - F.interpolate(
                    self.features[3](x), scale_factor=2, mode='nearest'), 0))
        #pool_latent records the locations where the original pixel values are greater than the ones after interpolation
        #from a max pooled output.
        x = self.features[3](x)  #perform maxpooling on input image/activation
        xbias = self.features[3](
            xbias)  #perform maxpooling on input bias/bias activation

        ############################  conv2 #####################################
        x = self.features[4](x)
        xbias = self.features[4](xbias)
        mean_latent_cnn.append(th.mean(x, dim=(0, 2, 3), keepdim=True))
        var_latent_cnn.append(
            th.mean((x - th.mean(x, dim=(0, 2, 3), keepdim=True))**2,
                    dim=(0, 2, 3),
                    keepdim=True))

        ############################   batchnorm2 ##################################
        x = self.features[5](x)
        xbias = self.insnorms_cnn[1](xbias)
        bias_latent_cnn.append(self.features[5].bias)

        ############################   relu2 ##################################
        x = self.features[6](x)
        xbias = self.features[6](xbias)
        relu_latent.append(th.gt(x, 0).float() + th.le(x, 0).float() * 0.1)

        #relu_latent and relu_latentpn keeps track of the pixels pool_latent are activated in the leaky relu
        relu_latentpn.append(
            th.gt(xbias, 0).float() + th.le(xbias, 0).float() * 0.1)

        ############################   pool2 ##################################

        pool_latent.append(
            th.ge(
                x - F.interpolate(
                    self.features[7](x), scale_factor=2, mode='nearest'), 0))
        #pool_latent records the locations where the original pixel values are greater than the ones after interpolation
        #from a max pooled output.
        x = self.features[7](x)  #perform maxpooling on input image/activation
        xbias = self.features[7](
            xbias)  #perform maxpooling on input bias/bias activation

        ############################  conv3 #####################################
        x = self.features[8](x)
        xbias = self.features[8](xbias)
        mean_latent_cnn.append(th.mean(x, dim=(0, 2, 3), keepdim=True))
        var_latent_cnn.append(
            th.mean((x - th.mean(x, dim=(0, 2, 3), keepdim=True))**2,
                    dim=(0, 2, 3),
                    keepdim=True))

        ############################   batchnorm3 ##################################
        x = self.features[9](x)
        xbias = self.insnorms_cnn[2](xbias)
        bias_latent_cnn.append(self.features[9].bias)

        ############################   relu3 ##################################
        x = self.features[10](x)
        xbias = self.features[10](xbias)
        relu_latent.append(th.gt(x, 0).float() + th.le(x, 0).float() * 0.1)

        #relu_latent and relu_latentpn keeps track of the pixels pool_latent are activated in the leaky relu
        relu_latentpn.append(
            th.gt(xbias, 0).float() + th.le(xbias, 0).float() * 0.1)

        ############################   pool3 ##################################

        pool_latent.append(
            th.ge(
                x - F.interpolate(
                    self.features[11](x), scale_factor=2, mode='nearest'), 0))
        #pool_latent records the locations where the original pixel values are greater than the ones after interpolation
        #from a max pooled output.
        x = self.features[11](x)  #perform maxpooling on input image/activation
        xbias = self.features[11](
            xbias)  #perform maxpooling on input bias/bias activation

        relu_latent = relu_latent[::-1]
        pool_latent = pool_latent[::-1]
        bias_latent_cnn = bias_latent_cnn[::-1]
        self.bias_latent_cnn = bias_latent_cnn
        relu_latentpn = relu_latentpn[::-1]
        mean_latent_cnn = mean_latent_cnn[::-1]
        var_latent_cnn = var_latent_cnn[::-1]

        # send the features into the classifier
        trl_w, z = self.trl(x)
        w_t = trl_w.permute(dims=(3, 0, 1, 2))

        # do reconstruction via nrm
        # xhat: the reconstruction image
        # loss_pn: path normalization loss
        # use z to reconstruct instead of argmax z

        xhat, _, loss_pn, loss_neg = self.topdown(
            self.nrm, make_one_hot(y, self.num_class), relu_latent,
            pool_latent, bias_latent_cnn, tl.ones(
                [1, z.size()[1]], device=None), relu_latentpn, mean_latent_cnn,
            var_latent_cnn, w_t) if y is not None else self.topdown(
                self.nrm,
                make_one_hot(th.argmax(z.detach(), dim=1), self.num_class),
                relu_latent, pool_latent, bias_latent_cnn,
                tl.ones([1, z.size()[1]], device=None), relu_latentpn,
                mean_latent_cnn, var_latent_cnn, w_t)

        return [z, xhat, loss_pn, loss_neg]
Esempio n. 35
0
 def _bound_logvar_lookup(self):
     self.logvar_lookup.weight.data[torch.le(
         self.logvar_lookup.weight, self.logvar_bound)] = self.logvar_bound