def updateGradInput(self, input, y): v1 = input[0] v2 = input[1] gw1 = self.gradInput[0] gw2 = self.gradInput[1] gw1.resize_as_(v1).copy_(v2) gw2.resize_as_(v1).copy_(v1) torch.mul(self.w1, self.w22, out=self.buffer) gw1.addcmul_(-1, self.buffer.expand_as(v1), v1) gw1.mul_(self.w.expand_as(v1)) torch.mul(self.w1, self.w32, out=self.buffer) gw2.addcmul_(-1, self.buffer.expand_as(v1), v2) gw2.mul_(self.w.expand_as(v1)) # self._idx = self._outputs <= 0 torch.le(self._outputs, 0, out=self._idx) self._idx = self._idx.view(-1, 1).expand(gw1.size()) gw1[self._idx] = 0 gw2[self._idx] = 0 torch.eq(y, 1, out=self._idx) self._idx = self._idx.view(-1, 1).expand(gw2.size()) gw1[self._idx] = gw1[self._idx].mul_(-1) gw2[self._idx] = gw2[self._idx].mul_(-1) if self.sizeAverage: gw1.div_(y.size(0)) gw2.div_(y.size(0)) return self.gradInput
def smooth_L1(pred,targets,alpha_in,alpha_out,beta=1.0): x=(pred-targets)*alpha_in xabs=torch.abs(x) y1=0.5*x**2/beta y2=xabs-0.5*beta case1=torch.le(xabs,beta).float() case2=1-case1 return torch.sum((y1*case1+y2*case2)*alpha_out)/pred.size(0)
def backward(self, grad_output): v1, v2, y = self.saved_tensors buffer = v1.new() _idx = v1.new().byte() gw1 = grad_output.new() gw2 = grad_output.new() gw1.resize_as_(v1).copy_(v2) gw2.resize_as_(v1).copy_(v1) torch.mul(self.w1, self.w22, out=buffer) gw1.addcmul_(-1, buffer.expand_as(v1), v1) gw1.mul_(self.w.expand_as(v1)) torch.mul(self.w1, self.w32, out=buffer) gw2.addcmul_(-1, buffer.expand_as(v1), v2) gw2.mul_(self.w.expand_as(v1)) torch.le(self._outputs, 0, out=_idx) _idx = _idx.view(-1, 1).expand(gw1.size()) gw1[_idx] = 0 gw2[_idx] = 0 torch.eq(y, 1, out=_idx) _idx = _idx.view(-1, 1).expand(gw2.size()) gw1[_idx] = gw1[_idx].mul_(-1) gw2[_idx] = gw2[_idx].mul_(-1) if self.size_average: gw1.div_(y.size(0)) gw2.div_(y.size(0)) grad_output_val = grad_output[0] if grad_output_val != 1: gw1.mul_(grad_output_val) gw2.mul_(grad_output_val) return gw1, gw2, None
def pck(source_points,warped_points,L_pck,alpha=0.1): # compute precentage of correct keypoints batch_size=source_points.size(0) pck=torch.zeros((batch_size)) for i in range(batch_size): p_src = source_points[i,:] p_wrp = warped_points[i,:] N_pts = torch.sum(torch.ne(p_src[0,:],-1)*torch.ne(p_src[1,:],-1)) point_distance = torch.pow(torch.sum(torch.pow(p_src[:,:N_pts]-p_wrp[:,:N_pts],2),0),0.5) L_pck_mat = L_pck[i].expand_as(point_distance) correct_points = torch.le(point_distance,L_pck_mat*alpha) pck[i]=torch.mean(correct_points.float()) return pck
def get_reward_fn(env, states_tensor, actions_tensor): if (env == 'lin_dyn') or (env.spec.id == 'lin-dyn-v0'): #set actions multiplier to 0 to try with reinforce rewards = -( torch.einsum('ijk,ijk->ij', [states_tensor, states_tensor]) + torch.einsum('ijk,ijk->ij', [actions_tensor, actions_tensor])) #rewards = torch.clamp(states_tensor[:,0]**2, min=0., max=1.0) return rewards if env.spec.id == 'Pendulum-v0': thcos = states_tensor[:, :, 0] thsin = states_tensor[:, :, 1] thdot = states_tensor[:, :, 2] #pdb.set_trace() #tanth = thsin/thcos #tanth[torch.isnan(tanth)] = 0 th = torch.atan2(thsin, thcos) if torch.isnan(th).any(): pdb.set_trace() #u = torch.clamp(actions_tensor, min=-MAX_TORQUE, max=MAX_TORQUE).squeeze() u = actions_tensor.squeeze().unsqueeze(1) costs = angle_normalize(th)**2 + .1 * thdot**2 + .001 * (u**2) return -costs #.unsqueeze(2) elif env.spec.id == 'HalfCheetah-v2': dt = 0.05 #from stepping through env xposbefore = states_tensor[:, 0] # xposbefore = self.sim.data.qpos[0] # self.do_simulation(action, self.frame_skip) #can't do this step because this is also for stepping through environment, but I actually HAVE the next states, and can compare them directly here xposafter = states_tensor #self.sim.data.qpos[0] ob = self._get_obs() reward_ctrl = -0.1 * torch.square(actions_tensor).sum() reward_run = (xposafter - xposbefore) / dt reward = reward_ctrl + reward_run def _get_obs(self): return np.concatenate([ self.sim.data.qpos.flat[1:], self.sim.data.qvel.flat, ]) elif env.spec.id == 'dm-Pendulum-v0': COS_BND = np.cos(np.deg2rad(8)) rewards = (torch.le(states_tensor[:, :, 0], 1) == torch.ge(states_tensor[:, :, 0], COS_BND)) return rewards.double() elif env.spec.id == 'dm-Cartpole-swingup-v0': #TAKES NEXT STATE FOR REWARD NOT CURRENT STATE rewards_to_return = torch.zeros(states_tensor.shape[0]) for d in range(states_tensor.shape[0]): pole_angle_cosine = states_tensor[d, 1] upright = (pole_angle_cosine + 1) / 2 centered = tolerance(states_tensor[d, 0], margin=2) centered = (1 + centered) / 2 small_control = tolerance(actions_tensor[d, :], margin=1, value_at_margin=0, sigmoid='quadratic')[0] small_control = (4 + small_control) / 5 small_velocity = tolerance(states_tensor[d, 4], margin=5).min() small_velocity = (1 + small_velocity) / 2 rewards_to_return[d] = upright.mean( ) * small_control * small_velocity * centered #torch.from_numpy(centered) # OrderedDict([('position', array([ 0.01871485, -0.99999419, -0.00340747])), ('velocity', array([0.04293839, 0.06518433]))]) return rewards_to_return elif env.spec.id == 'CartPole-v0': theta_threshold_radians = 12 * 2 * np.pi / 360 x_threshold = 2.4 x = states_tensor[:, :, 0] #x_dot = states_tensor[:,:,1] theta = states_tensor[:, :, 2] #theta_dot = states_tensor[:,:,3] #this is a problem because ITS A BIG MATRIX WITH BATCHES AND DIFFERENT TIME STEPS!!!!!! done = (x < -x_threshold) \ | (x > x_threshold) \ | (theta < -theta_threshold_radians) \ | (theta > theta_threshold_radians) #done = bool(done) return done.transpose(1, 0) # if not done: # reward = 1.0 # elif self.steps_beyond_done is None: # # Pole just fell! # self.steps_beyond_done = 0 # reward = 1.0 # else: # if self.steps_beyond_done == 0: # logger.warn("You are calling 'step()' even though this environment has already returned done = True. You should always call 'reset()' once you receive 'done = True' -- any further steps are undefined behavior.") # self.steps_beyond_done += 1 # reward = 0.0 # else: # raise NotImplementedError # elif env.spec.id == 'dm_cartpole_balance': # states = states_tensor.cpu().detach().numpy() # print(states) # states = np.swapaxes(np.atleast_3d(states), 1,2) # pole_angle_cosine = states[:,:,1] # cart_position = states[:,:,0] # angular_vel = states[:,:,] # control = actions_tensor.cpu().detach().numpy().squeeze() # upright = (pole_angle_cosine + 1) / 2 # centered = tolerance(cart_position, margin=2) # centered = (1 + centered) / 2 # small_control = tolerance(actions_tensor, margin=1, # value_at_margin=0.000000001, # sigmoid='quadratic')[0] # small_control = (4 + small_control) / 5 # small_velocity = tolerance(angular_vel, margin=5).min() # small_velocity = (1 + small_velocity) / 2 # return torch.FloatTensor(np.expand_dims(upright.mean(axis=0),axis=1) * small_control * small_velocity * centered.T) return 0
def forward(self, classifications, regressions, anchors, annotations): alpha = 0.25 gamma = 2.0 batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[0, :, :] anchor_widths = anchor[:, 2] - anchor[:, 0] anchor_heights = anchor[:, 3] - anchor[:, 1] anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights # print("Batch size : ",batch_size) # print("Class size : ",classifications.shape) # print("Ressg size : ",regressions.shape) # print(annotations) num = len(os.listdir("./")) f = open("record" + str(num) + ".txt","w") for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] # print("bbox_annotation shape is : ",bbox_annotation.shape) # print(bbox_annotation) for i in range(bbox_annotation.shape[0]): f.write(str(bbox_annotation[i])[7:-18] + "\n") f.write("="*50 + "\n") if bbox_annotation.shape[0] == 0: # print(annotations) if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).float().cuda()) classification_losses.append(torch.tensor(0).float().cuda()) else: regression_losses.append(torch.tensor(0).float()) classification_losses.append(torch.tensor(0).float()) f.write("0 0\n") continue if torch.cuda.is_available(): each_bbox_loss = torch.zeros(bbox_annotation.shape[0]).cuda() else: each_bbox_loss = torch.zeros(bbox_annotation.shape[0]) classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations # print("IOU shape is : " , IoU.shape) # print(IoU) IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 # print("IOU_max shape is : ",IoU_max.shape) # print(IoU_max) # print("IOU_argmax shape is : " ,IoU_argmax.shape) # print(IoU_argmax) #import pdb #pdb.set_trace() # compute the loss for classification targets = torch.ones(classification.shape) * -1 # print("Target shape is : ",targets.shape) if torch.cuda.is_available(): targets = targets.cuda() targets[torch.lt(IoU_max, 0.4), :] = 0 positive_indices = torch.ge(IoU_max, 0.5) # print("positive_indices shape is ", positive_indices.shape) # print(positive_indices) num_positive_anchors = positive_indices.sum() # print(num_positive_anchors) assigned_annotations = bbox_annotation[IoU_argmax, :] # print("assigned_annotations shape is : " ,assigned_annotations.shape) # print(assigned_annotations) # classP True_CLASS targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 # print("target shape is : " ,targets.shape) # print(targets) if torch.cuda.is_available(): alpha_factor = torch.ones(targets.shape).cuda() * alpha else: alpha_factor = torch.ones(targets.shape) * alpha # = -(aplha)^gamma * log(classification) - (1 - alpha)^gamma * log(1 - classification) alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) # print("focal shape is ",focal_weight.shape) # print(focal_weight) # print("BCE shape is ",bce.shape) # print(bce) # cls_loss = focal_weight * torch.pow(bce, gamma) cls_loss = focal_weight * bce # cls_loss = bce # print("CLS loss shape is : " , cls_loss.shape) # print(cls_loss) # print("cls_loss[0] : " ,cls_loss[0]) if torch.cuda.is_available(): cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) else: cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape)) tmp1 = bce[positive_indices] # print("Tmp1 shape is : " ,tmp1.shape) # tmp5 = classification[positive_indices,assigned_annotations[positive_indices, 4].long()] tmp = classification[positive_indices,:] # print("Tmp shape is : " , tmp.shape) # P bbox_annotation clss_loss tmp2 = cls_loss[positive_indices] # print("Tmp2 shape is : " , tmp2.shape) classification_losses.append(cls_loss.sum()/torch.clamp(num_positive_anchors.float(), min=1.0)) f.write(str(classification_losses) + "\n") # print("clss_loss sum is : ",cls_loss.sum()) # print("num_positive_anchors is : ",cls_loss.sum()) # print("classification_losses shape is : " , len(classification_losses)) # print(classification_losses) # print("final:") # print(alpha_factor[positive_indices][0]) # print(focal_weight[positive_indices][0]) # print(bce[positive_indices][0]) # print(cls_loss[positive_indices][0]) # compute the loss for regression # start = 0;end = 0;start1 = 0;end1 = 0 # if tmp.shape[0] != 0: # start = str(tmp1[0]).index("[") # end = str(tmp1[0]).index("]") # start1 = str(tmp[0]).index("[") # end1 = str(tmp[0]).index("]") # else: # print(tmp.shape,positive_indices.sum()) # print(classification.shape) # print(bce.shape) # print(cls_loss.shape) # print(cls_loss.sum()) # print(torch.clamp(num_positive_anchors.float(), min=1.0)) the_Iou_argmax = IoU_argmax[positive_indices] for i in range(tmp.shape[0]): # print('{}'.format(tmp[i].data)) # length1 = len(str(tmp1[i])) # length = len(str(tmp[i])) # if(start >= length1 or str(tmp1[i])[start] != "["): # start = str(tmp1[i]).index("[") # if(end >= length1 or str(tmp1[i])[end] != "]"): # end = str(tmp1[i]).index("]") # if(start1 >= length or str(tmp[i])[start1] != "["): # start1 = str(tmp[i]).index("[") # if(end1 >= length or str(tmp[i])[end1] != "]"): # end1 = str(tmp[i]).index("]") f.write(str(tmp[i])+ " "+ str(the_Iou_argmax[i].item()) + " " + str(tmp1[i]) + " " + str(tmp2[i].sum().item()) + "\n") f.write("-"*50+"\n") if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # clip widths to 1 gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh)) targets = targets.t() # print("New Target shape is ",targets.shape) # print(targets) if torch.cuda.is_available(): targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda() else: targets = targets/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]) negative_indices = 1 + (~positive_indices) # print("~positive_indices shape is : ") # print(~positive_indices) # print("negative_indices shape is :" ,negative_indices.shape) # print(negative_indices) regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0 ) # print("regression_loss shape is : ", regression_loss.shape) # print(regression_loss) regression_losses.append(regression_loss.mean()) f.write(str(regression_losses[-1].item()) + "\n") # start = str(regression_loss[0]).index("[") # end = str(regression_loss[0]).index("]") for i in range(regression_loss.shape[0]): # if(str(regression_loss[i])[start] != "["): # start = str(regression_loss[i]).index("[") # if(str(regression_loss[i])[end] != "["): # end = str(regression_loss[i]).index("]") f.write(str(IoU_argmax[positive_indices][i].item()) + " " + str(regression_loss[i]) + " " + str(IoU_max[positive_indices][i].item()) + " " + str(anchor[positive_indices][i]) + "\n") else: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).float().cuda()) else: regression_losses.append(torch.tensor(0).float()) f.write("0") result = torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True) f.write(str((classification_losses[-1] + regression_losses[-1]).item()) + "\n") f.close() return result
def get_mel_banks(num_bins, window_length_padded, sample_freq, low_freq, high_freq, vtln_low, vtln_high, vtln_warp_factor): # type: (int, int, float, float, float, float, float) """ Returns: Tuple[torch.Tensor, torch.Tensor]: The tuple consists of ``bins`` (which is melbank of size (``num_bins``, ``num_fft_bins``)) and ``center_freqs`` (which is center frequencies of bins of size (``num_bins``)). """ assert num_bins > 3, 'Must have at least 3 mel bins' assert window_length_padded % 2 == 0 num_fft_bins = window_length_padded / 2 nyquist = 0.5 * sample_freq if high_freq <= 0.0: high_freq += nyquist assert (0.0 <= low_freq < nyquist) and (0.0 < high_freq <= nyquist) and (low_freq < high_freq), \ ('Bad values in options: low-freq %f and high-freq %f vs. nyquist %f' % (low_freq, high_freq, nyquist)) # fft-bin width [think of it as Nyquist-freq / half-window-length] fft_bin_width = sample_freq / window_length_padded mel_low_freq = mel_scale_scalar(low_freq) mel_high_freq = mel_scale_scalar(high_freq) # divide by num_bins+1 in next line because of end-effects where the bins # spread out to the sides. mel_freq_delta = (mel_high_freq - mel_low_freq) / (num_bins + 1) if vtln_high < 0.0: vtln_high += nyquist assert vtln_warp_factor == 1.0 or ((low_freq < vtln_low < high_freq) and (0.0 < vtln_high < high_freq) and (vtln_low < vtln_high)), \ ('Bad values in options: vtln-low %f and vtln-high %f, versus low-freq %f and high-freq %f' % (vtln_low, vtln_high, low_freq, high_freq)) bin = torch.arange(num_bins).unsqueeze(1) left_mel = mel_low_freq + bin * mel_freq_delta # size(num_bins, 1) center_mel = mel_low_freq + (bin + 1.0) * mel_freq_delta # size(num_bins, 1) right_mel = mel_low_freq + (bin + 2.0) * mel_freq_delta # size(num_bins, 1) if vtln_warp_factor != 1.0: left_mel = vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, high_freq, vtln_warp_factor, left_mel) center_mel = vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, high_freq, vtln_warp_factor, center_mel) right_mel = vtln_warp_mel_freq(vtln_low, vtln_high, low_freq, high_freq, vtln_warp_factor, right_mel) center_freqs = inverse_mel_scale(center_mel) # size (num_bins) # size(1, num_fft_bins) mel = mel_scale(fft_bin_width * torch.arange(num_fft_bins)).unsqueeze(0) # size (num_bins, num_fft_bins) up_slope = (mel - left_mel) / (center_mel - left_mel) down_slope = (right_mel - mel) / (right_mel - center_mel) if vtln_warp_factor == 1.0: # left_mel < center_mel < right_mel so we can min the two slopes and clamp negative values bins = torch.max(torch.zeros(1), torch.min(up_slope, down_slope)) else: # warping can move the order of left_mel, center_mel, right_mel anywhere bins = torch.zeros_like(up_slope) up_idx = torch.gt(mel, left_mel) & torch.le( mel, center_mel) # left_mel < mel <= center_mel down_idx = torch.gt(mel, center_mel) & torch.lt( mel, right_mel) # center_mel < mel < right_mel bins[up_idx] = up_slope[up_idx] bins[down_idx] = down_slope[down_idx] return bins, center_freqs
def calculate(self, sample_list, model_output, k, *args, **kwargs): ranks = self.get_ranks(sample_list, model_output) recall = float(torch.sum(torch.le(ranks, k))) / ranks.size(0) return recall
def less_equal(x, y, **kwargs): if not torch.is_tensor(x): x = torch.tensor(x) if not torch.is_tensor(y): y = torch.tensor(y) return torch.le(x, y, **kwargs)
def forward(self, classifications, regressions, anchors, annotations): alpha = 0.25 gamma = 2.0 batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[0, :, :] anchor_widths = anchor[:, 2] - anchor[:, 0] anchor_heights = anchor[:, 3] - anchor[:, 1] anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] if bbox_annotation.shape[0] == 0: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).float().cuda()) classification_losses.append( torch.tensor(0).float().cuda()) else: regression_losses.append(torch.tensor(0).float()) classification_losses.append(torch.tensor(0).float()) continue classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) IoU = calc_iou( anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 #import pdb #pdb.set_trace() # compute the loss for classification targets = torch.ones(classification.shape) * -1 if torch.cuda.is_available(): targets = targets.cuda() targets[torch.lt(IoU_max, 0.4), :] = 0 positive_indices = torch.ge(IoU_max, 0.5) num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 if torch.cuda.is_available(): alpha_factor = torch.ones(targets.shape).cuda() * alpha else: alpha_factor = torch.ones(targets.shape) * alpha alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) # cls_loss = focal_weight * torch.pow(bce, gamma) cls_loss = focal_weight * bce if torch.cuda.is_available(): cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) else: cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape)) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) # compute the loss for regression if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # clip widths to 1 gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dx, targets_dy, targets_dw, targets_dh)) targets = targets.t() if torch.cuda.is_available(): targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2] ]).cuda() else: targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]) negative_indices = 1 + (~positive_indices) regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).float().cuda()) else: regression_losses.append(torch.tensor(0).float()) return torch.stack(classification_losses).mean( dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
y.scatter_(-1, input_y.unsqueeze(1), 1) # Parameters: # 1st: dim, along which attribute. # 2nd: the column tensor indicating the indices of the elements to scatter. # - the tensor should have the same # of dimensions as the y_onehot, which is 2, so # we use unsqueeze to adds an extra dimension. (from (4898) to (4898,1)). # 3rd: tensor containing the elements to scatter. print(y[:10]) # Normalize the input data (Using z-norm/standardization): x_mean = torch.mean(input_x, dim=0) x_variance = torch.var(input_x, dim=0) x = (input_x - x_mean) / torch.sqrt(x_variance) print(x[:10]) # Determine which types of wine are bad: # wines with rank < 3 is bad: bad_index = torch.le(input_y, 3) print(bad_index.shape, bad_index[:10], bad_index.sum()) bad_data = data[torch.le(input_y, 3)] mid_data = data[torch.lt(input_y, 7) & torch.gt(input_y, 3)] good_data = data[torch.ge(input_y, 7)] bad_mean = torch.mean(bad_data, dim=0) mid_mean = torch.mean(mid_data, dim=0) good_mean = torch.mean(good_data, dim=0) for i, args in enumerate(zip(next(csv.reader(open(file_path), delimiter=';')), bad_mean, mid_mean, good_mean)): print('{:2} {:20} {:6.2f} {:6.2f} {:6.2f}'.format(i, *args))
def ppo_update(self, states, actions, log_probs, returns, advantages, sg_returns, sg_advantage, c_q_returns, c_costs, clip_param=0.2): """ does the actual PPO update here """ for _ in range(self.ppo_epochs): for state, action, old_log_probs, return_, advantage, sg_adv, sg_return_, c_q_return_, c_cost_ in self.ppo_iter( states, actions, log_probs, returns, advantages, sg_returns, sg_advantage, c_q_returns, c_costs): val, mu_safe, dist = self.safe_ac(state, current_cost=c_cost_) cost_q_val = self.cost_critic(state, mu_safe.detach()) # for actor entropy = dist.entropy().mean() new_log_probs = dist.log_prob(action) ratio = (new_log_probs - old_log_probs).exp() surr1 = ratio * advantage surr2 = torch.clamp(ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) * advantage actor_loss = -torch.min(surr1, surr2) if self.args.cost_sg_coeff: # safeguard policy here, without baseline _, sg_mu, sg_std = self.ac_model(state) sg_val = self.sg_model(state) unconst_dist = torch.distributions.Normal(sg_mu, sg_std) sg_new_log_probs = unconst_dist.log_prob(action) sg_ratio = (sg_new_log_probs - old_log_probs).exp() sg_1 = sg_ratio * sg_adv sg_2 = torch.clamp(sg_ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) * sg_adv sg_loss = -torch.min(sg_1, sg_2) violate_mask = torch.le(c_q_return_ + c_q_return_, self.args.d0).float().detach() actor_loss = violate_mask * actor_loss + ( 1. - violate_mask) * self.args.cost_sg_coeff * sg_loss #-------------------------------------------------------------- actor_loss = actor_loss.mean() # add to the final ac loss critic_loss = (return_ - val).pow(2).mean() ac_loss = (self.args.value_loss_coef * critic_loss) + \ (actor_loss) - (self.args.beta * entropy) self.ac_optimizer.zero_grad() ac_loss.backward() self.ac_optimizer.step() # for costs # for reviewer self.cost_critic.zero_grad() cost_critic_loss = (c_q_return_ - cost_q_val).pow(2).mean() self.critic_optimizer.zero_grad() cost_critic_loss.backward() self.critic_optimizer.step() # clean everything just in case self.clear_models_grad() # extra step if self.args.cost_sg_coeff: sg_val_loss = self.args.value_loss_coef * ( sg_return_ - sg_val).pow(2).mean() sg_val_loss.backward() self.sg_optimizer.step() # clean everything just in case self.clear_models_grad()
def negLogLikelihoodLoss(self, batchInput): wordSeqTensor, tagSeqTensor, wordSeqLengths, charSeqTensor, charSeqLengths, seq2NodeTensor, node2SeqTensor, adjMatrixTensor, gazNode2Idxs, gazNodeLengths, nodeNums, gazBlankState, fwbigramTensor, bwbigramTensor = batchInput batchSize = wordSeqTensor.shape[0] sentLength = wordSeqTensor.shape[1] maskTemp = torch.arange(1, sentLength + 1, dtype=torch.int64).view( 1, sentLength).expand(batchSize, sentLength) if self.useGpu: maskTemp = move2cuda(maskTemp) mask = torch.le( maskTemp, wordSeqLengths.view(batchSize, 1).expand(batchSize, sentLength)) if self.useGpu: mask = move2cuda(mask) if self.useChar: wordSeqEmbedding = self.dropout( self.wordEmbedding(wordSeqTensor, charSeqTensor, charSeqLengths)) else: if self.useBigram: wordSeqEmbedding = self.dropout( torch.cat([ self.wordEmbedding(wordSeqTensor), self.fwbigramEmbedding(fwbigramTensor), self.bwbigramEmbedding(bwbigramTensor) ], 2)) else: wordSeqEmbedding = self.dropout( self.wordEmbedding(wordSeqTensor)) wordStateEmbedding = self.embStateLinear(wordSeqEmbedding) maxMainNodeLength = node2SeqTensor.shape[1] mainNodeState = torch.gather( wordStateEmbedding, 1, node2SeqTensor.expand(batchSize, maxMainNodeLength, wordStateEmbedding.shape[2])) if self.gaNum > 0: initNodeStateEmbedding = torch.cat([ mainNodeState, gazBlankState.view(batchSize, -1, 1).expand( batchSize, -1, self.stateDim) ], dim=1) else: initNodeStateEmbedding = mainNodeState startNodeIdx = nodeNums.clone() for gazIdx in range(self.gaNum): gazState = self.gaLinear[gazIdx](self.gaEmb[gazIdx]( gazNode2Idxs[gazIdx])) gazMaskRaw = torch.arange(0, gazState.shape[1], dtype=torch.int64).view( 1, gazState.shape[1], 1).expand(batchSize, gazState.shape[1], self.stateDim) if self.useGpu: gazMaskRaw = move2cuda(gazMaskRaw) gazMask = torch.where( gazMaskRaw < gazNodeLengths[gazIdx].view(batchSize, 1, 1), gazMaskRaw, gazNodeLengths[gazIdx].view(batchSize, 1, 1)) if self.useGpu: gazMask = move2cuda(gazMask) gazMask = gazMask + startNodeIdx.view(batchSize, 1, 1).expand( batchSize, gazState.shape[1], self.stateDim) initNodeStateEmbedding.scatter_(1, gazMask, gazState) startNodeIdx = startNodeIdx + gazNodeLengths[gazIdx] nodeGraphEmbeddings = [initNodeStateEmbedding] for i in range(self.nLayer): nodeGraphEmbeddings.append( self.graphEmb[i](nodeGraphEmbeddings[i], adjMatrixTensor, adjMatrixTensor.shape[1])) nodeGraphEmbedding = nodeGraphEmbeddings[self.nLayer] wordGraphEmbedding = torch.gather( nodeGraphEmbedding, 1, seq2NodeTensor.expand( [batchSize, sentLength, nodeGraphEmbedding.shape[2]])) if self.useRnn: rnnEmbedding = self.encoder(wordGraphEmbedding, wordSeqLengths) wordFeatures = self.logsoftmax(self.embFeatureLinear(rnnEmbedding)) else: wordFeatures = self.logsoftmax( self.embFeatureLinear(wordGraphEmbedding)) totalScore, scores = self.crf(wordFeatures, wordSeqLengths, mask) goldScore = self.crf.scoreSentence(tagSeqTensor, wordSeqLengths, scores, mask) return totalScore - goldScore
def test_comparison_ops_with_type_promotion(self, device): value_for_type = { torch.uint8: (1 << 5), torch.int8: (1 << 5), torch.int16: (1 << 10), torch.int32: (1 << 20), torch.int64: (1 << 35), torch.float16: (1 << 10), torch.float32: (1 << 20), torch.float64: (1 << 35) } comparison_ops = [ dict( name="lt", out_op=lambda x, y, d: torch.lt( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.lt(x, y), compare_op=lambda x, y: x < y, ), dict( name="le", out_op=lambda x, y, d: torch.le( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.le(x, y), compare_op=lambda x, y: x <= y, ), dict( name="gt", out_op=lambda x, y, d: torch.gt( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.gt(x, y), compare_op=lambda x, y: x > y, ), dict( name="ge", out_op=lambda x, y, d: torch.ge( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.ge(x, y), compare_op=lambda x, y: x >= y, ), dict( name="eq", out_op=lambda x, y, d: torch.eq( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.eq(x, y), compare_op=lambda x, y: x == y, ), dict( name="ne", out_op=lambda x, y, d: torch.ne( x, y, out=torch.empty(1, dtype=torch.bool, device=d)), ret_op=lambda x, y: torch.ne(x, y), compare_op=lambda x, y: x != y, ), ] for op in comparison_ops: for dt1 in torch.testing.get_all_math_dtypes(device): for dt2 in torch.testing.get_all_math_dtypes(device): val1 = value_for_type[dt1] val2 = value_for_type[dt2] t1 = torch.tensor([val1], dtype=dt1, device=device) t2 = torch.tensor([val2], dtype=dt2, device=device) expected = torch.tensor([op["compare_op"](val1, val2)], dtype=torch.bool) out_res = op["out_op"](t1, t2, device) self.assertEqual(out_res, expected) self.assertTrue(out_res.dtype == torch.bool) self.assertTrue(t1.dtype == dt1) self.assertTrue(t2.dtype == dt2) out_res = op["ret_op"](t1, t2) self.assertEqual(out_res, expected) self.assertTrue(out_res.dtype == torch.bool) self.assertTrue(t1.dtype == dt1) self.assertTrue(t2.dtype == dt2) # test that comparing a zero dim tensor with another zero dim tensor has type promotion behavior t1 = torch.tensor(val1, dtype=dt1, device=device) t2 = torch.tensor(val2, dtype=dt2, device=device) expected = torch.tensor(op["compare_op"](val1, val2), dtype=torch.bool) out_res = op["out_op"](t1, t2, device) self.assertEqual(out_res, expected) self.assertTrue(out_res.dtype == torch.bool) self.assertTrue(t1.dtype == dt1) self.assertTrue(t2.dtype == dt2) out_res = op["ret_op"](t1, t2) self.assertEqual(out_res, expected) self.assertTrue(out_res.dtype == torch.bool) self.assertTrue(t1.dtype == dt1) self.assertTrue(t2.dtype == dt2)
def infer(self, memory, memory_lengths): """ Decoder inference PARAMS ------ memory: Encoder outputs RETURNS ------- mel_outputs: mel outputs from the decoder gate_outputs: gate outputs from the decoder alignments: sequence of attention weights from the decoder """ decoder_input = self.get_go_frame(memory) mask = get_mask_from_lengths(memory_lengths) (attention_hidden, attention_cell, decoder_hidden, decoder_cell, attention_weights, attention_weights_cum, attention_context, processed_memory) = self.initialize_decoder_states(memory) mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32, device=memory.device) not_finished = torch.ones([memory.size(0)], dtype=torch.int32, device=memory.device) mel_outputs, gate_outputs, alignments = (torch.zeros(1), torch.zeros(1), torch.zeros(1)) first_iter = True while True: decoder_input = self.prenet(decoder_input) (mel_output, gate_output, attention_hidden, attention_cell, decoder_hidden, decoder_cell, attention_weights, attention_weights_cum, attention_context) = self.decode( decoder_input, attention_hidden, attention_cell, decoder_hidden, decoder_cell, attention_weights, attention_weights_cum, attention_context, memory, processed_memory, mask) if first_iter: mel_outputs = mel_output.unsqueeze(0) gate_outputs = gate_output alignments = attention_weights first_iter = False else: mel_outputs = torch.cat((mel_outputs, mel_output.unsqueeze(0)), dim=0) gate_outputs = torch.cat((gate_outputs, gate_output), dim=0) alignments = torch.cat((alignments, attention_weights), dim=0) dec = torch.le(torch.sigmoid(gate_output), self.gate_threshold).to(torch.int32).squeeze(1) not_finished = not_finished * dec mel_lengths += not_finished if self.early_stopping and torch.sum(not_finished) == 0: break if len(mel_outputs) == self.max_decoder_steps: print("Warning! Reached max decoder steps") break decoder_input = mel_output mel_outputs, gate_outputs, alignments = self.parse_decoder_outputs( mel_outputs, gate_outputs, alignments) return mel_outputs, gate_outputs, alignments, mel_lengths
def percentage_correct_keypoints(keypoints: np.array, predictions: np.array, thresh: float = 0.5, pck_type: str = "object", image_size: float = None): """ Args: keypoints: Keypoints with shape [B, N, 2] or [N,2] predictions: Predicted keypoints with shape [B, N, 2] or [N,2] image_size (optional): indicates the size of the image, necessary when pck_type == "image" thresh: threshold for pck pck_type (optional): default object, indicates which way to compute the pck, e.g. via image size or max object distance "object": take the max of the object * alpha "image": take the image width/height * alpha Returns: pck mean, pck per joint """ if pck_type == "image" and image_size == None: raise ValueError(f"When using pck_type='image', then you need to pass the image_size!") assert pck_type in ["image", "object"], f"Got wrong pck_type, got {pck_type}" assert len(keypoints.shape) == 3, f"Only implemented for a batch got shape of keypoints: {keypoints.shape}" keypoints = sure_to_torch(keypoints).cpu() predictions = sure_to_torch(predictions).cpu() assert len(keypoints) == len(predictions), "Keypoints and predictions tensor need to have the same size." batch_size = keypoints.size(0) pck = torch.zeros(batch_size) num_pts = torch.zeros(batch_size) num_joints = torch.zeros((batch_size, keypoints.size(1))) correct_index = -torch.ones((batch_size, len(keypoints[0]))) l2distance = torch.zeros((batch_size, len(keypoints[0]))) for idx in range(batch_size): # computes pck for all keypoint pairs of once instance p_src = keypoints[idx, :] p_pred = predictions[idx, :] # True values in mask indicate the keypoint was present in the dataset # Negative values indicate the value was not in the dataset mask = torch.ne(p_src[:, 0], 0) * torch.ne(p_src[:, 1], 0) # if only one point is present if len(p_src[mask]) < 2: pck[idx] = 0 correct_index[idx, :] = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) continue num_joints[idx] = mask if pck_type == 'object': l_pck = row_pairwise_distances(p_src[mask]) # l_pck = torch.Tensor([torch.max(p_src.max(1)[0] - p_src.min(1)[0])]) elif pck_type == 'image': l_pck = torch.Tensor([image_size]) # Sum all available keypoints in the dataset N_pts = torch.sum(mask) # Set points not present in the dataset to false in source and target points p_src[~mask, :] = 0 p_pred[~mask, :] = 0 num_pts[idx] = N_pts point_distance = torch.pow(torch.sum(torch.pow(p_src - p_pred, 2), 1), 0.5) # 0.5 means squared!! point_distance[~mask] = 0 l2distance[idx, :] = point_distance.view(-1) L_pck_mat = l_pck.expand_as(point_distance) # val -> val, val correct_points = torch.le(point_distance, L_pck_mat * thresh).type(torch.uint8) correct_points[~mask] = 0 # C_pts = torch.sum(correct_points) correct_index[idx, :] = correct_points.view(-1) # PCK for the image is divided by the number of valid points in GT # correct_not_found = sum(p_pred[~mask][:,0] == 0) pck[idx] = torch.sum(correct_points.float()) / torch.clamp(N_pts.float(), min=1e-6) assert pck[idx] >= 0 # Reduce to joint granularity correct_per_joint = torch.sum(correct_index, dim=0) sum_available_joint = torch.sum(num_joints, dim=0) l2_average = torch.sum(l2distance) / torch.sum(num_joints) l2_average_joint = torch.sum(l2distance, dim=0) / torch.clamp(sum_available_joint, min=1e-6) # clamp the tensor, sometimes we have zero available joints and then we have NaN values pck_joints = correct_per_joint / torch.clamp(sum_available_joint, min=1e-6) pck_average = torch.sum(correct_index) / torch.sum(num_joints) return pck_average.numpy(), pck_joints.numpy(), l2_average.detach().numpy(), l2_average_joint.detach().numpy()
def forward(self, classifications, regressions, anchors, annotations): alpha = 0.25 gamma = 2.0 batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[0, :, :] # 形状为[5*K*A, 4] anchor_widths = anchor[:, 2] - anchor[:, 0] anchor_heights = anchor[:, 3] - anchor[:, 1] anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[j, :, :] # (5*H*W*A)*K regression = regressions[j, :, :] # (5*H*W*A)*4 bbox_annotation = annotations[j, :, :] # num_annots * 5 bbox_annotation = bbox_annotation[ bbox_annotation[:, 4] != -1] # 取出正常标注的样本的标注, valid_num_annots * 5 if bbox_annotation.shape[0] == 0: regression_losses.append(torch.tensor(0).float().cuda()) classification_losses.append(torch.tensor(0).float().cuda()) continue classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) # (5*H*W*A)*K IoU = calc_iou( anchor, bbox_annotation[:, :4]) # num_anchors x valid_num_annots # IoU_max表示每个anchor与标注框重叠度最高的那个标注框之间的IoU # IoU_argmax表示每个anchor与标注框重叠度最高的那个标注框的索引 IoU_max, IoU_argmax = torch.max(IoU, dim=1) # (num_anchors, ) # import pdb # pdb.set_trace() # compute the loss for classification targets = torch.ones(classification.shape) * -1 # (5*H*W*A)*K targets = targets.cuda() targets[torch.lt(IoU_max, 0.4), :] = 0 # IOU<0.4的设为0 positive_indices = torch.ge( IoU_max, 0.5) # IOU>=0.5的anchors的索引的掩码, (num_anchors, ) num_positive_anchors = positive_indices.sum() # 正样本的数量 assigned_annotations = bbox_annotation[ IoU_argmax, :] # (num_anchors, 5), anchors对应的标注框 targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[ positive_indices, 4].long()] = 1 # 正样本的one-hot向量, (num_anchors, K) alpha_factor = torch.full(targets.shape, fill_value=alpha).cuda() alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) # cls_loss = focal_weight * torch.pow(bce, gamma) cls_loss = focal_weight * bce cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) # compute the loss for regression if num_positive_anchors > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[ positive_indices] # 正样本的anchors的宽度 anchor_heights_pi = anchor_heights[ positive_indices] # 正样本的anchors的高度 anchor_ctr_x_pi = anchor_ctr_x[ positive_indices] # 正样本的anchors的中心x坐标 anchor_ctr_y_pi = anchor_ctr_y[ positive_indices] # 正样本的anchors的中心y坐标 gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # clip widths to 1 gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi ) / anchor_widths_pi # (num_anchors, ) targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dx, targets_dy, targets_dw, targets_dh)) targets = targets.t() targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda() negative_indices = 1 + (~positive_indices) regression_diff = torch.abs( targets - regression[positive_indices, :]) # (num_positives, 4) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: regression_losses.append(torch.tensor(0).float().cuda()) return torch.stack(classification_losses).mean( dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
def infer_tacotron2_trt(encoder, decoder_iter, postnet, encoder_context, decoder_context, postnet_context, sequences, sequence_lengths, measurements, fp16): memory = torch.zeros((len(sequence_lengths), sequence_lengths[0], 512)).cuda() if fp16: memory = memory.half() device = memory.device dtype = memory.dtype processed_memory = torch.zeros((len(sequence_lengths),sequence_lengths[0],128), device=device, dtype=dtype) lens = torch.zeros_like(sequence_lengths) encoder_tensors = { # inputs 'sequences': sequences, 'sequence_lengths': sequence_lengths, # outputs 'memory': memory, 'lens': lens, 'processed_memory': processed_memory } print("Running Tacotron2 Encoder") with MeasureTime(measurements, "tacotron2_encoder_time"): run_trt_engine(encoder_context, encoder, encoder_tensors) device = memory.device mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32, device = device) not_finished = torch.ones([memory.size(0)], dtype=torch.int32, device = device) mel_outputs, gate_outputs, alignments = (torch.zeros(1, device = device), torch.zeros(1, device = device), torch.zeros(1, device = device)) gate_threshold = 0.5 max_decoder_steps = 1664 first_iter = True decoder_inputs = init_decoder_inputs(memory, processed_memory, sequence_lengths) decoder_outputs = init_decoder_outputs(memory, sequence_lengths) print("Running Tacotron2 Decoder") measurements_decoder = {} while True: decoder_tensors = init_decoder_tensors(decoder_inputs, decoder_outputs) with MeasureTime(measurements_decoder, "step"): run_trt_engine(decoder_context, decoder_iter, decoder_tensors) if first_iter: mel_outputs = torch.unsqueeze(decoder_outputs[7], 2) gate_outputs = torch.unsqueeze(decoder_outputs[8], 2) alignments = torch.unsqueeze(decoder_outputs[4], 2) measurements['tacotron2_decoder_time'] = measurements_decoder['step'] first_iter = False else: mel_outputs = torch.cat((mel_outputs, torch.unsqueeze(decoder_outputs[7], 2)), 2) gate_outputs = torch.cat((gate_outputs, torch.unsqueeze(decoder_outputs[8], 2)), 2) alignments = torch.cat((alignments, torch.unsqueeze(decoder_outputs[4], 2)), 2) measurements['tacotron2_decoder_time'] += measurements_decoder['step'] dec = torch.le(torch.sigmoid(decoder_outputs[8]), gate_threshold).to(torch.int32).squeeze(1) not_finished = not_finished*dec mel_lengths += not_finished if torch.sum(not_finished) == 0: print("Stopping after",mel_outputs.size(2),"decoder steps") break if mel_outputs.size(2) == max_decoder_steps: print("Warning! Reached max decoder steps") break decoder_inputs, decoder_outputs = swap_inputs_outputs(decoder_inputs, decoder_outputs) mel_outputs_postnet = torch.zeros_like(mel_outputs, device=device, dtype=dtype) postnet_tensors = { # inputs 'mel_outputs': mel_outputs, # outputs 'mel_outputs_postnet': mel_outputs_postnet } print("Running Tacotron2 Postnet") with MeasureTime(measurements, "tacotron2_postnet_time"): run_trt_engine(postnet_context, postnet, postnet_tensors) print("Tacotron2 Postnet done") return mel_outputs_postnet, mel_lengths
def infer_tacotron2_trt(encoder, decoder_iter, postnet, encoder_context, decoder_context, postnet_context, sequences, sequence_lengths, measurements, fp16, loop): batch_size = len(sequence_lengths) max_sequence_len = sequence_lengths[0] memory = torch.zeros((batch_size, max_sequence_len, 512)).cuda() if fp16: memory = memory.half() device = memory.device dtype = memory.dtype processed_memory = torch.zeros((batch_size, max_sequence_len, 128), device=device, dtype=dtype) lens = torch.zeros_like(sequence_lengths) print(f"batch_size: {batch_size}, max sequence length: {max_sequence_len}") encoder_tensors = { "inputs": { 'sequences': sequences, 'sequence_lengths': sequence_lengths }, "outputs": { 'memory': memory, 'lens': lens, 'processed_memory': processed_memory } } print("Running Tacotron2 Encoder") with MeasureTime(measurements, "tacotron2_encoder_time"): run_trt_engine(encoder_context, encoder, encoder_tensors) max_decoder_steps = 1024 device = memory.device mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32, device=device) not_finished = torch.ones([memory.size(0)], dtype=torch.int32, device=device) mel_outputs = torch.ones((batch_size, 80, max_decoder_steps), device=device, dtype=dtype).cuda() gate_threshold = 0.5 first_iter = True decoder_inputs = init_decoder_inputs(memory, processed_memory, sequence_lengths) decoder_outputs = init_decoder_outputs(memory, sequence_lengths) if loop: if decoder_context is None: print("Running Tacotron2 Decoder with loop with ONNX-RT") decoder_inputs_onnxrt = [ x.cpu().numpy().copy() for x in decoder_inputs ] import onnx import onnxruntime sess = onnxruntime.InferenceSession(decoder_iter) with MeasureTime(measurements, "tacotron2_decoder_time"): result = sess.run( ["mel_outputs", "mel_lengths_t"], { 'decoder_input_0': decoder_inputs_onnxrt[0], 'attention_hidden_0': decoder_inputs_onnxrt[1], 'attention_cell_0': decoder_inputs_onnxrt[2], 'decoder_hidden_0': decoder_inputs_onnxrt[3], 'decoder_cell_0': decoder_inputs_onnxrt[4], 'attention_weights_0': decoder_inputs_onnxrt[5], 'attention_weights_cum_0': decoder_inputs_onnxrt[6], 'attention_context_0': decoder_inputs_onnxrt[7], 'memory': decoder_inputs_onnxrt[8], 'processed_memory': decoder_inputs_onnxrt[9], 'mask': decoder_inputs_onnxrt[10] }) mel_outputs = torch.tensor(result[0], device=device) mel_lengths = torch.tensor(result[1], device=device) else: print("Running Tacotron2 Decoder with loop") decoder_tensors = { "inputs": { 'decoder_input_0': decoder_inputs[0], 'attention_hidden_0': decoder_inputs[1], 'attention_cell_0': decoder_inputs[2], 'decoder_hidden_0': decoder_inputs[3], 'decoder_cell_0': decoder_inputs[4], 'attention_weights_0': decoder_inputs[5], 'attention_weights_cum_0': decoder_inputs[6], 'attention_context_0': decoder_inputs[7], 'memory': decoder_inputs[8], 'processed_memory': decoder_inputs[9], 'mask': decoder_inputs[10] }, "outputs": { 'mel_outputs': mel_outputs, 'mel_lengths_t': mel_lengths } } with MeasureTime(measurements, "tacotron2_decoder_time"): run_trt_engine(decoder_context, decoder_iter, decoder_tensors) mel_outputs = mel_outputs[:, :, :torch.max(mel_lengths)] else: print("Running Tacotron2 Decoder") measurements_decoder = {} while True: decoder_tensors = init_decoder_tensors(decoder_inputs, decoder_outputs) with MeasureTime(measurements_decoder, "step"): run_trt_engine(decoder_context, decoder_iter, decoder_tensors) if first_iter: mel_outputs = torch.unsqueeze(decoder_outputs[7], 2) gate_outputs = torch.unsqueeze(decoder_outputs[8], 2) alignments = torch.unsqueeze(decoder_outputs[4], 2) measurements['tacotron2_decoder_time'] = measurements_decoder[ 'step'] first_iter = False else: mel_outputs = torch.cat( (mel_outputs, torch.unsqueeze(decoder_outputs[7], 2)), 2) gate_outputs = torch.cat( (gate_outputs, torch.unsqueeze(decoder_outputs[8], 2)), 2) alignments = torch.cat( (alignments, torch.unsqueeze(decoder_outputs[4], 2)), 2) measurements['tacotron2_decoder_time'] += measurements_decoder[ 'step'] dec = torch.le(torch.sigmoid(decoder_outputs[8]), gate_threshold).to(torch.int32).squeeze(1) not_finished = not_finished * dec mel_lengths += not_finished if torch.sum(not_finished) == 0: print("Stopping after", mel_outputs.size(2), "decoder steps") break if mel_outputs.size(2) == max_decoder_steps: print("Warning! Reached max decoder steps") break decoder_inputs, decoder_outputs = swap_inputs_outputs( decoder_inputs, decoder_outputs) mel_outputs = mel_outputs.clone().detach() mel_outputs_postnet = torch.zeros_like(mel_outputs, device=device, dtype=dtype) postnet_tensors = { "inputs": { 'mel_outputs': mel_outputs }, "outputs": { 'mel_outputs_postnet': mel_outputs_postnet } } print("Running Tacotron2 Postnet") with MeasureTime(measurements, "tacotron2_postnet_time"): run_trt_engine(postnet_context, postnet, postnet_tensors) print("Tacotron2 Postnet done") return mel_outputs_postnet, mel_lengths
def forward(self, img_batch_shape, attention_mask, bboxs): h, w = img_batch_shape[2], img_batch_shape[3] mask_losses = [] batch_size = bboxs.shape[0] for j in range(batch_size): bbox_annotation = bboxs[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] cond1 = torch.le(bbox_annotation[:, 0], w) cond2 = torch.le(bbox_annotation[:, 1], h) cond3 = torch.le(bbox_annotation[:, 2], w) cond4 = torch.le(bbox_annotation[:, 3], h) cond = cond1 * cond2 * cond3 * cond4 bbox_annotation = bbox_annotation[cond, :] if bbox_annotation.shape[0] == 0: mask_losses.append(torch.tensor(0).float().cuda()) continue bbox_area = (bbox_annotation[:, 2] - bbox_annotation[:, 0]) * ( bbox_annotation[:, 3] - bbox_annotation[:, 1]) mask_loss = [] for id in range(len(attention_mask)): attention_map = attention_mask[id][j, 0, :, :] min_area = (2**(id + 5))**2 * 0.5 max_area = (2**(id + 5) * 1.58)**2 * 2 level_bbox_indice1 = torch.ge(bbox_area, min_area) level_bbox_indice2 = torch.le(bbox_area, max_area) level_bbox_indice = level_bbox_indice1 * level_bbox_indice2 level_bbox_annotation = bbox_annotation[ level_bbox_indice, :].clone() #level_bbox_annotation = bbox_annotation.clone() attention_h, attention_w = attention_map.shape if level_bbox_annotation.shape[0]: level_bbox_annotation[:, 0] *= attention_w / w level_bbox_annotation[:, 1] *= attention_h / h level_bbox_annotation[:, 2] *= attention_w / w level_bbox_annotation[:, 3] *= attention_h / h mask_gt = torch.zeros(attention_map.shape) mask_gt = mask_gt.cuda() for i in range(level_bbox_annotation.shape[0]): x1 = max(int(level_bbox_annotation[i, 0]), 0) y1 = max(int(level_bbox_annotation[i, 1]), 0) x2 = min( math.ceil(level_bbox_annotation[i, 2]) + 1, attention_w) y2 = min( math.ceil(level_bbox_annotation[i, 3]) + 1, attention_h) mask_gt[y1:y2, x1:x2] = 1 mask_gt = mask_gt[mask_gt >= 0] mask_predict = attention_map[attention_map >= 0] mask_loss.append(F.binary_cross_entropy(mask_predict, mask_gt)) mask_losses.append(torch.stack(mask_loss).mean()) return torch.stack(mask_losses).mean(dim=0, keepdim=True)
def forward(self, center_maps, scale_maps, annotations, stride=4): batch_size = center_maps.size()[0] scale_losses = [] center_losses = [] for i in range(batch_size): boxes = annotations[i] center_map = center_maps[i] scale_map = scale_maps[i] boxes = (boxes // stride).long() center_map = torch.clamp(center_map, 1e-4, 1.0 - 1e-4) x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3] center_x, center_y, width, height = (x1 + x2) / 2, ( y1 + y2) / 2, x2 - x1, y2 - y1 center_gt = torch.zeros(center_map.shape).cuda() # #print(center_gt.size()) scale_gt = torch.zeros(scale_map.shape).cuda() center_gt[:, center_y, center_x] = 1.0 region_x = torch.cat([ center_x - 2, center_x - 1, center_x, center_x + 1, center_x + 2 ]) region_y = torch.cat([ center_y - 2, center_y - 1, center_y, center_y + 1, center_y + 2 ]) scale_gt[:, region_y.cuda(), region_x.cuda()] = (torch.log(height.float())).repeat( 5, ).cuda() Gauss_map = torch.zeros(center_map.shape).cuda() pos_map = torch.zeros(center_map.shape).cuda() K = boxes.size()[0] for i in range(K): c_x, c_y, w, h = center_x[i], center_y[i], width[i], height[i] k_Gauss = get_mask(w, h, c_x, c_y) Gauss_map[:, y1[i]:y2[i], x1[i]:x2[i]] = torch.max( k_Gauss.unsqueeze(0), Gauss_map[:, y1[i]:y2[i], x1[i]:x2[i]]) pos_map[:, y1[i]:y2[i], x1[i]:x2[i]] = 1 Gauss_map = torch.pow(1.0 - Gauss_map, self.beta) Gauss_map = Gauss_map * pos_map #ipdb.set_trace() alpha_factor = torch.ones(center_map.shape).cuda() * self.alpha alpha_factor = torch.where(torch.eq(center_gt, 1.), alpha_factor, Gauss_map) focal_weight = torch.where(torch.eq(center_gt, 1.), 1.0 - center_map, center_map) focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma) bce = -(center_gt * torch.log(center_map) + (1.0 - center_gt) * torch.log(1.0 - center_map)) center_loss = focal_weight * bce center_loss = center_loss.sum() / max(1.0, K) center_losses.append(center_loss) scale_diff = torch.abs(scale_gt - scale_map) scale_loss = torch.where(torch.le(scale_diff, 1.0), 0.5 * torch.pow(scale_diff, 2), scale_diff - 0.5) scale_loss = torch.where(torch.ne(scale_gt, 0.), scale_loss, torch.zeros(scale_loss.shape).cuda()) scale_losses.append(scale_loss.sum() / max(1.0, K)) return torch.stack(center_losses).mean( dim=0, keepdim=True), torch.stack(scale_losses).mean(dim=0, keepdim=True)
def forward(ctx, classifications, regressions, anchors, annotations): batch_size = classifications.shape[0] regression_losses = [] regression_grads=torch.zeros(regressions.shape).cuda() p_num=torch.zeros(1).cuda() labels_b=[] anchor = anchors[0, :, :].type(torch.cuda.FloatTensor) anchor_widths = anchor[:, 2] - anchor[:, 0]+1.0 anchor_heights = anchor[:, 3] - anchor[:, 1]+1.0 anchor_ctr_x = anchor[:, 0] + 0.5 * (anchor_widths-1.0) anchor_ctr_y = anchor[:, 1] + 0.5 * (anchor_heights-1.0) for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] if bbox_annotation.shape[0] == 0: regression_losses.append(torch.tensor(0).float().cuda()) labels_b.append(torch.zeros(classification.shape).cuda()) continue IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) # num_anchors x num_annotations IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 # compute the loss for classification targets = torch.ones(classification.shape) * -1 targets = targets.cuda() ###### gt_IoU_max, gt_IoU_argmax = torch.max(IoU, dim=0) gt_IoU_argmax=torch.where(IoU==gt_IoU_max)[0] positive_indices = torch.ge(torch.zeros(IoU_max.shape).cuda(),1) positive_indices[gt_IoU_argmax.long()] = True ###### positive_indices = positive_indices | torch.ge(IoU_max, 0.5) negative_indices = torch.lt(IoU_max, 0.4) p_num+=positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[negative_indices, :] = 0 targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 labels_b.append(targets) # compute the loss for regression if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]+1.0 gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]+1.0 gt_ctr_x = assigned_annotations[:, 0] + 0.5 * (gt_widths-1.0) gt_ctr_y = assigned_annotations[:, 1] + 0.5 * (gt_heights-1.0) # clip widths to 1 gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets2 = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh)) targets2 = targets2.t() targets2 = targets2/torch.Tensor([[0.1, 0.1, 0.2, 0.2]]).cuda() #negative_indices = ~ positive_indices regression_diff = regression[positive_indices, :]-targets2 regression_diff_abs= torch.abs(regression_diff) regression_loss = torch.where( torch.le(regression_diff_abs, 1.0 / 1.0), 0.5 * 1.0 * torch.pow(regression_diff_abs, 2), regression_diff_abs - 0.5 / 1.0 ) regression_losses.append(regression_loss.sum()) regression_grad=torch.where( torch.le(regression_diff_abs,1.0/1.0), 1.0*regression_diff, torch.sign(regression_diff)) regression_grads[j,positive_indices,:]=regression_grad else: regression_losses.append(torch.tensor(0).float().cuda()) p_num=torch.clamp(p_num,min=1) regression_grads/=(4*p_num) ########################AP-LOSS########################## labels_b=torch.stack(labels_b) classification_grads,classification_losses=AP_loss(classifications,labels_b) ######################################################### ctx.save_for_backward(classification_grads,regression_grads) return classification_losses, torch.stack(regression_losses).sum(dim=0, keepdim=True)/p_num
def get_mask_from_lengths(lengths): max_len = torch.max(lengths).item() ids = torch.arange(0, max_len, device=lengths.device, dtype=lengths.dtype) mask = (ids < lengths.unsqueeze(1)).byte() mask = torch.le(mask, 0) return mask
def preprocess(sample_dict, pre_x2d, out_dim, rescale_dist=0.0): rand_angle = np.random.random_sample() * 2.0 * np.pi rand_R = quaternion_matrix(quaternion_about_axis(rand_angle, (0.0, 1.0, 0.0)))[:3, :3] rand_R = torch.FloatTensor(rand_R).unsqueeze(0) scene_rgb = sample_dict['frames_img'][:, :5, ...].cuda() scene_depth = sample_dict['frames_depth'][:, :5, ...].cuda() scene_K = sample_dict['frames_K'][:, :5, ...].cuda() scene_Tcw = sample_dict['frames_Tcw'][:, :5, ...] scene_ori_rgb = sample_dict['frames_ori_img'][:, :5, ...].cuda() scene_neg_tags = sample_dict['frames_neg_tags'][:, :5, ...].cuda() N, L, C, H, W = scene_rgb.shape # scene_rgb = scene_rgb.view(N, L, C, H, W) scene_depth = scene_depth.view(N * L, 1, H, W) scene_K = scene_K.view(N * L, 3, 3) scene_Tcw = scene_Tcw.view(N * L, 3, 4) # generate 3D world position of scene d = scene_depth.view(N * L, H * W, 1) # dim (N*L, H*W, 1) X_3d = batched_pi_inv(scene_K, pre_x2d, d) # dim (N*L, H*W, 3) Rwc, twc = batched_inv_pose(R=scene_Tcw[:, :3, :3], t=scene_Tcw[:, :3, 3].squeeze(-1)) # dim (N*L, 3, 3), (N, 3) X_world = batched_transpose(Rwc.cuda(), twc.cuda(), X_3d) # dim (N*L, H*W, 3) X_world = X_world.contiguous().view(N, L * H * W, 3) # dim (N, L*H*W, 3) scene_center = torch.mean(X_world, dim=1) # dim (N, 3) X_world -= scene_center.view(N, 1, 3) X_world = batched_transpose(rand_R.cuda().expand(N, 3, 3), torch.zeros(1, 3, 1).cuda().expand(N, 3, 1), X_world) # dim (N, L*H*W, 3), data augmentation X_world = X_world.view(N, L, H, W, 3).permute(0, 1, 4, 2, 3).contiguous() # dim (N, L, 3, H, W) # query image: query_img = sample_dict['img'] query_ori_img = sample_dict['ori_img'] # compute multiscale ground truth query_X_worlds & valid_masks query_X_worlds = [] valid_masks = [] out_H, out_W = out_dim query_depth = sample_dict['depth'].cuda() ori_query_depth = query_depth.clone() N, C, H, W = query_depth.shape for i in range(4): query_depth_patch = F.unfold( query_depth, kernel_size=(H // out_H, W // out_W), stride=(H // out_H, W // out_W) ).view(N, -1, out_H, out_W) mask = torch.gt(query_depth_patch, 1e-5) count = torch.sum(mask.float(), dim=1) query_depth_down = torch.sum(query_depth_patch * mask.float(), dim=1) / \ torch.where(torch.le(count, 1e-5), torch.full(count.shape, 1e6).to(count.device), count) # (N, 1, out_H, out_W) query_Tcw = sample_dict['Tcw'] query_K = sample_dict['K'].clone().cuda() query_K[:, 0, 0] *= out_W / W query_K[:, 0, 2] *= out_W / W query_K[:, 1, 1] *= out_H / H query_K[:, 1, 2] *= out_H / H query_d = query_depth_down.view(N, out_H * out_W, 1) # dim (N, H*W, 1) out_x_2d = x_2d_coords_torch(N, out_H, out_W).cuda().view(N, -1, 2) query_X_3d = batched_pi_inv(query_K, out_x_2d, query_d) # dim (N, H*W, 3) query_Rwc, query_twc = batched_inv_pose(R=query_Tcw[:, :3, :3], t=query_Tcw[:, :3, 3].squeeze(-1)) # dim (N, 3, 3), (N, 3) query_X_world = batched_transpose(query_Rwc.cuda(), query_twc.cuda(), query_X_3d) # dim (N, H*W, 3) query_X_world -= scene_center.view(N, 1, 3) query_X_world = batched_transpose(rand_R.cuda().expand(N, 3, 3), torch.zeros(1, 3, 1).cuda().expand(N, 3, 1), query_X_world) # dim (N, H*W, 3), data augmentation query_X_world = query_X_world.permute(0, 2, 1).view(N, 3, out_H, out_W).contiguous() # dim (N, 3, H, W) query_X_worlds.append(query_X_world.cuda()) valid_masks.append(torch.gt(query_depth_down, 1e-5).cuda().view(N, out_H, out_W)) if i == 3: query_X_worlds.append(query_X_world.cuda()) valid_masks.append(torch.gt(query_depth_down, 1e-5).cuda().view(N, out_H, out_W)) out_H //= 2 out_W //= 2 # compute norm_query_Tcw for normalized scene coordinate query_twc = query_twc.cuda() - scene_center.view(N, 3, 1) norm_query_Twc = torch.cat([query_Rwc.cuda(), query_twc], dim=-1) # dim (N, 3, 4) norm_query_Twc = torch.bmm(rand_R.cuda().expand(N, 3, 3), norm_query_Twc) # dim (N, 3, 4) query_Rcw, query_tcw = batched_inv_pose(R=norm_query_Twc[:, :3, :3], t=norm_query_Twc[:, :3, 3].squeeze(-1)) # dim (N, 3, 3), (N, 3) norm_query_Tcw = torch.cat([query_Rcw, query_tcw.view(N, 3, 1)], dim=-1) # dim (N, 3, 4) # compute down sampled query K out_H, out_W = out_dim query_K = sample_dict['K'].clone().cuda() query_K[:, 0, 0] *= out_W / W query_K[:, 0, 2] *= out_W / W query_K[:, 1, 1] *= out_H / H query_K[:, 1, 2] *= out_H / H if rescale_dist > 0: query_X_worlds, X_world, rescale_factor = rescale_scene_coords(query_X_worlds, X_world, scene_neg_tags, rescale_dist) else: rescale_factor = torch.ones(N) scene_input = torch.cat((scene_rgb, X_world), dim=2) return scene_input.cuda(), query_img.cuda(), query_X_worlds[::-1], valid_masks[::-1], \ scene_ori_rgb.cuda(), query_ori_img.cuda(), X_world.cuda(), \ torch.gt(scene_depth, 1e-5).cuda().view(N, L, H, W), norm_query_Tcw, query_K, scene_neg_tags, rescale_factor.cuda()
def forward(self, classifications, regressions, anchors, annotations): alpha = 0.75 # 0.25 gamma = 2.0 ignores = annotations[:, :, [-1]] annotations = annotations[:, :, 0: -1] batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[0, :, :] anchor_widths = anchor[:, 2] - anchor[:, 0] anchor_heights = anchor[:, 3] - anchor[:, 1] anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, -1] != -1] ignore = ignores[j, :, :] ignore = ignore[ignore[:, -1] != -1] if bbox_annotation.shape[0] == 0: regression_losses.append(torch.tensor(0).float().cuda()) classification_losses.append(torch.tensor(0).float().cuda()) continue classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, 4: 8]) # num_anchors x num_annotations IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 # import pdb # pdb.set_trace() # compute the loss for classification targets = torch.ones(classification.shape) * -1 targets = targets.cuda() targets[torch.lt(IoU_max, 0.4), :] = 0 positive_indices = torch.ge(IoU_max, 0.5) num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] assigned_ignores = ignore[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, -1].long()] = 1 alpha_factor = torch.ones(targets.shape).cuda() * alpha alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) # cls_loss = focal_weight * torch.pow(bce, gamma) cls_loss = focal_weight * bce cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda()) classification_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0)) # compute the loss for regression if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[positive_indices, :] assigned_ignores = assigned_ignores[positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths_h = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights_h = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x_h = assigned_annotations[:, 0] + 0.5 * gt_widths_h gt_ctr_y_h = assigned_annotations[:, 1] + 0.5 * gt_heights_h gt_widths_f = assigned_annotations[:, 6] - assigned_annotations[:, 4] gt_heights_f = assigned_annotations[:, 7] - assigned_annotations[:, 5] gt_ctr_x_f = assigned_annotations[:, 4] + 0.5 * gt_widths_f gt_ctr_y_f = assigned_annotations[:, 5] + 0.5 * gt_heights_f # clip widths to 1 gt_widths_h = torch.clamp(gt_widths_h, min=1) gt_heights_h = torch.clamp(gt_heights_h, min=1) gt_widths_f = torch.clamp(gt_widths_f, min=1) gt_heights_f = torch.clamp(gt_heights_f, min=1) targets_dx_f = (gt_ctr_x_f - anchor_ctr_x_pi) / anchor_widths_pi targets_dy_f = (gt_ctr_y_f - anchor_ctr_y_pi) / anchor_heights_pi targets_dw_f = torch.log(gt_widths_f / anchor_widths_pi) targets_dh_f = torch.log(gt_heights_f / anchor_heights_pi) targets_dx_h = (gt_ctr_x_h - anchor_ctr_x_pi) / anchor_widths_pi * 4 targets_dy_h = (gt_ctr_y_h - anchor_ctr_y_pi) / anchor_heights_pi * 4 targets_dw_h = torch.log(gt_widths_h / anchor_widths_pi * 4) targets_dh_h = torch.log(gt_heights_h / anchor_heights_pi * 4) targets = torch.stack((targets_dx_f, targets_dy_f, targets_dw_f, targets_dh_f, targets_dx_h, targets_dy_h, targets_dw_h, targets_dh_h)) targets = targets.t() targets = targets / torch.Tensor([[0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.2]]).cuda() negative_indices = 1 - positive_indices regression_diff = torch.abs(targets - regression[positive_indices, :]) weights = torch.ones(regression_diff.shape).cuda() if only_full: weights[:, 4:] = 0 else: weights[:, 4:] = 1 - assigned_ignores regression_diff = regression_diff * weights regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0 ) if only_full: regression_losses.append(regression_loss[:, 0:4].mean()) else: regression_losses.append(regression_loss.mean()) else: regression_losses.append(torch.tensor(0).float().cuda()) return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True)
def _kl_continuous_bernoulli_uniform(p, q): result = -p.entropy() + (q.high - q.low).log() return torch.where( torch.max(torch.ge(q.low, p.support.lower_bound), torch.le(q.high, p.support.upper_bound)), torch.ones_like(result) * inf, result)
def safe_log(self, tensor, eps=1e-16): is_zero = torch.le(tensor, eps) tensor = torch.where(is_zero, torch.ones_like(tensor), tensor) tensor = torch.where(is_zero, torch.zeros_like(tensor), torch.log(tensor)) return tensor
def train(args): #for creating the visdom object DEFAULT_PORT = 8097 DEFAULT_HOSTNAME = "http://localhost" viz = Visdom(DEFAULT_HOSTNAME, DEFAULT_PORT, ipv6=False) hyparam_list = [ ("model", args.model_name), ("cube", args.cube_len), ("bs", args.batch_size), ("g_lr", args.g_lr), ("d_lr", args.d_lr), ("z", args.z_dis), ("bias", args.bias), ("sl", args.soft_label), ] hyparam_dict = OrderedDict(((arg, value) for arg, value in hyparam_list)) log_param = make_hyparam_string(hyparam_dict) print(log_param) # for using tensorboard if args.use_tensorboard: import tensorflow as tf summary_writer = tf.summary.FileWriter(args.output_dir + args.log_dir + log_param) def inject_summary(summary_writer, tag, value, step): summary = tf.Summary( value=[tf.Summary.Value(tag=tag, simple_value=value)]) summary_writer.add_summary(summary, global_step=step) inject_summary = inject_summary # datset define dsets_path = args.input_dir + args.data_dir + "train/" print(dsets_path) x_train = np.load("voxels_3DMNIST_16.npy") dataset = x_train.reshape(-1, args.cube_len * args.cube_len * args.cube_len) print(dataset.shape) dset_loaders = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=1) # model define D = _D(args) G = _G(args) D_solver = optim.Adam(D.parameters(), lr=args.d_lr, betas=args.beta) G_solver = optim.Adam(G.parameters(), lr=args.g_lr, betas=args.beta) if torch.cuda.is_available(): print("using cuda") D.cuda() G.cuda() criterion = nn.BCELoss() pickle_path = "." + args.pickle_dir + log_param read_pickle(pickle_path, G, G_solver, D, D_solver) for epoch in range(args.n_epochs): epoch_start_time = time.time() print("epoch %d started" % (epoch)) for i, X in enumerate(dset_loaders): X = var_or_cuda(X) X = X.type(torch.cuda.FloatTensor) if X.size()[0] != int(args.batch_size): #print("batch_size != {} drop last incompatible batch".format(int(args.batch_size))) continue Z = generateZ(args) real_labels = var_or_cuda(torch.ones(args.batch_size)).view( -1, 1, 1, 1, 1) fake_labels = var_or_cuda(torch.zeros(args.batch_size)).view( -1, 1, 1, 1, 1) if args.soft_label: real_labels = var_or_cuda( torch.Tensor(args.batch_size).uniform_(0.9, 1.1)).view( -1, 1, 1, 1, 1) #### #fake_labels = var_or_cuda(torch.Tensor(args.batch_size).uniform_(0, 0.3)).view(-1,1,1,1,1) fake_labels = var_or_cuda(torch.zeros(args.batch_size)).view( -1, 1, 1, 1, 1) ##### # ============= Train the discriminator =============# d_real = D(X) d_real_loss = criterion(d_real, real_labels) fake = G(Z) d_fake = D(fake) d_fake_loss = criterion(d_fake, fake_labels) d_loss = d_real_loss + d_fake_loss d_real_acu = torch.ge(d_real.squeeze(), 0.5).float() d_fake_acu = torch.le(d_fake.squeeze(), 0.5).float() d_total_acu = torch.mean(torch.cat((d_real_acu, d_fake_acu), 0)) #if 1: if d_total_acu <= args.d_thresh: D.zero_grad() d_loss.backward() D_solver.step() # =============== Train the generator ===============# Z = generateZ(args) fake = G(Z) d_fake = D(fake) g_loss = criterion(d_fake, real_labels) D.zero_grad() G.zero_grad() g_loss.backward() G_solver.step() ####### #print(fake.shape) #print(fake.cpu().data[:8].squeeze().numpy().shape) # =============== logging each iteration ===============# iteration = str(G_solver.state_dict()['state'][ G_solver.state_dict()['param_groups'][0]['params'][0]]['step']) #print(type(iteration)) #iteration = str(i) #saving the model and a image each 100 iteration if int(iteration) % 300 == 0: #pickle_save_path = args.output_dir + args.pickle_dir + log_param #save_new_pickle(pickle_save_path, iteration, G, G_solver, D, D_solver) samples = fake.cpu().data[:8].squeeze().numpy() #print(samples.shape) for s in range(8): plotVoxelVisdom(samples[s, ...], viz, "Iteration:{:.4}".format(iteration)) # image_path = args.output_dir + args.image_dir + log_param # if not os.path.exists(image_path): # os.makedirs(image_path) # SavePloat_Voxels(samples, image_path, iteration) # =============== each epoch save model or save image ===============# print( 'Iter-{}; , D_loss : {:.4}, G_loss : {:.4}, D_acu : {:.4}, D_lr : {:.4}' .format(iteration, d_loss.item(), g_loss.item(), d_total_acu.item(), D_solver.state_dict()['param_groups'][0]["lr"])) epoch_end_time = time.time() if (epoch + 1) % args.image_save_step == 0: samples = fake.cpu().data[:8].squeeze().numpy() image_path = args.output_dir + args.image_dir + log_param if not os.path.exists(image_path): os.makedirs(image_path) SavePloat_Voxels(samples, image_path, iteration) if (epoch + 1) % args.pickle_step == 0: pickle_save_path = args.output_dir + args.pickle_dir + log_param save_new_pickle(pickle_save_path, iteration, G, G_solver, D, D_solver) print("epoch time", (epoch_end_time - epoch_start_time) / 60) print("epoch %d ended" % (epoch)) print("################################################")
def forward(self, classifications, regressions, anchors, annotations, **kwargs): alpha = 0.25 gamma = 2.0 batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[ 0, :, :] # assuming all image sizes are the same, which it is dtype = anchors.dtype anchor_widths = anchor[:, 3] - anchor[:, 1] anchor_heights = anchor[:, 2] - anchor[:, 0] anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j] bbox_annotation = bbox_annotation[bbox_annotation[:, 0] != -1] classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) if bbox_annotation.shape[0] == 0: if torch.cuda.is_available(): alpha_factor = torch.ones_like(classification) * alpha alpha_factor = alpha_factor.cuda() alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow( focal_weight, gamma) bce = -(torch.log(1.0 - classification)) cls_loss = focal_weight * bce regression_losses.append(torch.tensor(0).to(dtype).cuda()) classification_losses.append(cls_loss.sum()) else: alpha_factor = torch.ones_like(classification) * alpha alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow( focal_weight, gamma) bce = -(torch.log(1.0 - classification)) cls_loss = focal_weight * bce regression_losses.append(torch.tensor(0).to(dtype)) classification_losses.append(cls_loss.sum()) continue IoU = calc_iou(anchor[:, :], bbox_annotation[:, 1:]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) # compute the loss for classification targets = torch.ones_like(classification) * -1 if torch.cuda.is_available(): targets = targets.cuda() targets[torch.lt(IoU_max, 0.4), :] = 0 positive_indices = torch.ge(IoU_max, 0.5) num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 0].long()] = 1 alpha_factor = torch.ones_like(targets) * alpha if torch.cuda.is_available(): alpha_factor = alpha_factor.cuda() alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) cls_loss = focal_weight * bce zeros = torch.zeros_like(cls_loss) if torch.cuda.is_available(): zeros = zeros.cuda() cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.to(dtype), min=1.0)) if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_heights = assigned_annotations[:, 4] - assigned_annotations[:, 2] gt_ctr_x = assigned_annotations[:, 1] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 2] + 0.5 * gt_heights # efficientdet style gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dy, targets_dx, targets_dh, targets_dw)) targets = targets.t() regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).to(dtype).cuda()) else: regression_losses.append(torch.tensor(0).to(dtype)) return torch.stack(classification_losses).mean(dim=0, keepdim=True), \ torch.stack(regression_losses).mean(dim=0, keepdim=True)
def forward(self, pred, targ): reg_diff = torch.abs(targ - pred) reg_loss = torch.where(torch.le(reg_diff, 1 / 9), 4.5 * torch.pow(reg_diff, 2), reg_diff - 1 / 18) return reg_loss.mean()
def _compute_fake_acc(predictions): predictions = torch.le(predictions.data, 0.5) if len(predictions.size()) == 3: predictions = predictions.view(predictions.size(0) * predictions.size(1) * predictions.size(2)) acc = (predictions == 1).sum() / (1.0 * predictions.size(0)) return acc
def forward(self, classifications, regressions, anchors, annotations, **kwargs): alpha = 0.25 gamma = 2.0 batch_size = classifications.shape[0] classification_losses = [] regression_losses = [] anchor = anchors[ 0, :, :] # assuming all image sizes are the same, which it is dtype = anchors.dtype anchor_widths = anchor[:, 3] - anchor[:, 1] anchor_heights = anchor[:, 2] - anchor[:, 0] anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights for j in range(batch_size): classification = classifications[j, :, :] regression = regressions[j, :, :] bbox_annotation = annotations[j] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) if bbox_annotation.shape[0] == 0: if torch.cuda.is_available(): alpha_factor = torch.ones_like(classification) * alpha alpha_factor = alpha_factor.cuda() alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow( focal_weight, gamma) bce = -(torch.log(1.0 - classification)) cls_loss = focal_weight * bce regression_losses.append(torch.tensor(0).to(dtype).cuda()) classification_losses.append(cls_loss.sum()) else: alpha_factor = torch.ones_like(classification) * alpha alpha_factor = 1. - alpha_factor focal_weight = classification focal_weight = alpha_factor * torch.pow( focal_weight, gamma) bce = -(torch.log(1.0 - classification)) cls_loss = focal_weight * bce regression_losses.append(torch.tensor(0).to(dtype)) classification_losses.append(cls_loss.sum()) continue IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) # compute the loss for classification targets = torch.ones_like(classification) * -1 if torch.cuda.is_available(): targets = targets.cuda() targets[torch.lt(IoU_max, 0.4), :] = 0 positive_indices = torch.ge(IoU_max, 0.5) num_positive_anchors = positive_indices.sum() assigned_annotations = bbox_annotation[IoU_argmax, :] targets[positive_indices, :] = 0 targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1 alpha_factor = torch.ones_like(targets) * alpha if torch.cuda.is_available(): alpha_factor = alpha_factor.cuda() alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor) focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification) focal_weight = alpha_factor * torch.pow(focal_weight, gamma) bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification)) cls_loss = focal_weight * bce zeros = torch.zeros_like(cls_loss) if torch.cuda.is_available(): zeros = zeros.cuda() cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros) classification_losses.append( cls_loss.sum() / torch.clamp(num_positive_anchors.to(dtype), min=1.0)) if positive_indices.sum() > 0: assigned_annotations = assigned_annotations[ positive_indices, :] anchor_widths_pi = anchor_widths[positive_indices] anchor_heights_pi = anchor_heights[positive_indices] anchor_ctr_x_pi = anchor_ctr_x[positive_indices] anchor_ctr_y_pi = anchor_ctr_y[positive_indices] gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0] gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1] gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights # efficientdet style gt_widths = torch.clamp(gt_widths, min=1) gt_heights = torch.clamp(gt_heights, min=1) targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi targets_dw = torch.log(gt_widths / anchor_widths_pi) targets_dh = torch.log(gt_heights / anchor_heights_pi) targets = torch.stack( (targets_dy, targets_dx, targets_dh, targets_dw)) targets = targets.t() regression_diff = torch.abs(targets - regression[positive_indices, :]) regression_loss = torch.where( torch.le(regression_diff, 1.0 / 9.0), 0.5 * 9.0 * torch.pow(regression_diff, 2), regression_diff - 0.5 / 9.0) regression_losses.append(regression_loss.mean()) else: if torch.cuda.is_available(): regression_losses.append(torch.tensor(0).to(dtype).cuda()) else: regression_losses.append(torch.tensor(0).to(dtype)) # debug imgs = kwargs.get('imgs', None) if imgs is not None: regressBoxes = BBoxTransform() clipBoxes = ClipBoxes() obj_list = kwargs.get('obj_list', None) out = postprocess( imgs.detach(), torch.stack([anchors[0]] * imgs.shape[0], 0).detach(), regressions.detach(), classifications.detach(), regressBoxes, clipBoxes, 0.5, 0.3) imgs = imgs.permute(0, 2, 3, 1).cpu().numpy() imgs = ((imgs * [0.229, 0.224, 0.225] + [0.485, 0.456, 0.406]) * 255).astype(np.uint8) imgs = [cv2.cvtColor(img, cv2.COLOR_RGB2BGR) for img in imgs] display(out, imgs, obj_list, imshow=False, imwrite=True) return torch.stack(classification_losses).mean(dim=0, keepdim=True), \ torch.stack(regression_losses).mean(dim=0, keepdim=True)
def test_inference(encoder, decoder_iter, postnet): encoder.eval() decoder_iter.eval() postnet.eval() from trt.inference_trt import init_decoder_inputs texts = ["Hello World, good day."] sequences, sequence_lengths = prepare_input_sequence(texts) measurements = {} print("Running Tacotron2 Encoder") with torch.no_grad(): memory, processed_memory, lens = encoder(sequences, sequence_lengths) print("Running Tacotron2 Decoder") device = memory.device mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32, device = device) not_finished = torch.ones([memory.size(0)], dtype=torch.int32, device = device) mel_outputs, gate_outputs, alignments = (torch.zeros(1), torch.zeros(1), torch.zeros(1)) gate_threshold = 0.6 max_decoder_steps = 1000 first_iter = True (decoder_input, attention_hidden, attention_cell, decoder_hidden, decoder_cell, attention_weights, attention_weights_cum, attention_context, memory, processed_memory, mask) = init_decoder_inputs(memory, processed_memory, sequence_lengths) while True: with torch.no_grad(): (mel_output, gate_output, attention_hidden, attention_cell, decoder_hidden, decoder_cell, attention_weights, attention_weights_cum, attention_context) = decoder_iter(decoder_input, attention_hidden, attention_cell, decoder_hidden, decoder_cell, attention_weights, attention_weights_cum, attention_context, memory, processed_memory, mask) if first_iter: mel_outputs = torch.unsqueeze(mel_output, 2) gate_outputs = torch.unsqueeze(gate_output, 2) alignments = torch.unsqueeze(attention_weights, 2) first_iter = False else: mel_outputs = torch.cat((mel_outputs, torch.unsqueeze(mel_output, 2)), 2) gate_outputs = torch.cat((gate_outputs, torch.unsqueeze(gate_output, 2)), 2) alignments = torch.cat((alignments, torch.unsqueeze(attention_weights, 2)), 2) dec = torch.le(torch.sigmoid(gate_output), gate_threshold).to(torch.int32).squeeze(1) not_finished = not_finished*dec mel_lengths += not_finished if torch.sum(not_finished) == 0: print("Stopping after ",mel_outputs.size(2)," decoder steps") break if mel_outputs.size(2) == max_decoder_steps: print("Warning! Reached max decoder steps") break decoder_input = mel_output print("Running Tacotron2 PostNet") with torch.no_grad(): mel_outputs_postnet = postnet(mel_outputs) return mel_outputs_postnet
def forward(self, x, y=None): relu_latent = [] pool_latent = [] bias_latent_cnn = [] relu_latentpn = [] mean_latent_cnn = [] var_latent_cnn = [] xbias = th.zeros([1, x.shape[1], x.shape[2], x.shape[3]], device=None) ############################ conv1 ##################################### x = self.features[0](x) xbias = self.features[0](xbias) mean_latent_cnn.append(th.mean(x, dim=(0, 2, 3), keepdim=True)) var_latent_cnn.append( th.mean((x - th.mean(x, dim=(0, 2, 3), keepdim=True))**2, dim=(0, 2, 3), keepdim=True)) ############################ batchnorm1 ################################## x = self.features[1](x) xbias = self.insnorms_cnn[0](xbias) bias_latent_cnn.append(self.features[1].bias) ############################ relu1 ################################## x = self.features[2](x) xbias = self.features[2](xbias) relu_latent.append(th.gt(x, 0).float() + th.le(x, 0).float() * 0.1) #relu_latent and relu_latentpn keeps track of the pixels pool_latent are activated in the leaky relu relu_latentpn.append( th.gt(xbias, 0).float() + th.le(xbias, 0).float() * 0.1) ############################ pool1 ################################## pool_latent.append( th.ge( x - F.interpolate( self.features[3](x), scale_factor=2, mode='nearest'), 0)) #pool_latent records the locations where the original pixel values are greater than the ones after interpolation #from a max pooled output. x = self.features[3](x) #perform maxpooling on input image/activation xbias = self.features[3]( xbias) #perform maxpooling on input bias/bias activation ############################ conv2 ##################################### x = self.features[4](x) xbias = self.features[4](xbias) mean_latent_cnn.append(th.mean(x, dim=(0, 2, 3), keepdim=True)) var_latent_cnn.append( th.mean((x - th.mean(x, dim=(0, 2, 3), keepdim=True))**2, dim=(0, 2, 3), keepdim=True)) ############################ batchnorm2 ################################## x = self.features[5](x) xbias = self.insnorms_cnn[1](xbias) bias_latent_cnn.append(self.features[5].bias) ############################ relu2 ################################## x = self.features[6](x) xbias = self.features[6](xbias) relu_latent.append(th.gt(x, 0).float() + th.le(x, 0).float() * 0.1) #relu_latent and relu_latentpn keeps track of the pixels pool_latent are activated in the leaky relu relu_latentpn.append( th.gt(xbias, 0).float() + th.le(xbias, 0).float() * 0.1) ############################ pool2 ################################## pool_latent.append( th.ge( x - F.interpolate( self.features[7](x), scale_factor=2, mode='nearest'), 0)) #pool_latent records the locations where the original pixel values are greater than the ones after interpolation #from a max pooled output. x = self.features[7](x) #perform maxpooling on input image/activation xbias = self.features[7]( xbias) #perform maxpooling on input bias/bias activation ############################ conv3 ##################################### x = self.features[8](x) xbias = self.features[8](xbias) mean_latent_cnn.append(th.mean(x, dim=(0, 2, 3), keepdim=True)) var_latent_cnn.append( th.mean((x - th.mean(x, dim=(0, 2, 3), keepdim=True))**2, dim=(0, 2, 3), keepdim=True)) ############################ batchnorm3 ################################## x = self.features[9](x) xbias = self.insnorms_cnn[2](xbias) bias_latent_cnn.append(self.features[9].bias) ############################ relu3 ################################## x = self.features[10](x) xbias = self.features[10](xbias) relu_latent.append(th.gt(x, 0).float() + th.le(x, 0).float() * 0.1) #relu_latent and relu_latentpn keeps track of the pixels pool_latent are activated in the leaky relu relu_latentpn.append( th.gt(xbias, 0).float() + th.le(xbias, 0).float() * 0.1) ############################ pool3 ################################## pool_latent.append( th.ge( x - F.interpolate( self.features[11](x), scale_factor=2, mode='nearest'), 0)) #pool_latent records the locations where the original pixel values are greater than the ones after interpolation #from a max pooled output. x = self.features[11](x) #perform maxpooling on input image/activation xbias = self.features[11]( xbias) #perform maxpooling on input bias/bias activation relu_latent = relu_latent[::-1] pool_latent = pool_latent[::-1] bias_latent_cnn = bias_latent_cnn[::-1] self.bias_latent_cnn = bias_latent_cnn relu_latentpn = relu_latentpn[::-1] mean_latent_cnn = mean_latent_cnn[::-1] var_latent_cnn = var_latent_cnn[::-1] # send the features into the classifier trl_w, z = self.trl(x) w_t = trl_w.permute(dims=(3, 0, 1, 2)) # do reconstruction via nrm # xhat: the reconstruction image # loss_pn: path normalization loss # use z to reconstruct instead of argmax z xhat, _, loss_pn, loss_neg = self.topdown( self.nrm, make_one_hot(y, self.num_class), relu_latent, pool_latent, bias_latent_cnn, tl.ones( [1, z.size()[1]], device=None), relu_latentpn, mean_latent_cnn, var_latent_cnn, w_t) if y is not None else self.topdown( self.nrm, make_one_hot(th.argmax(z.detach(), dim=1), self.num_class), relu_latent, pool_latent, bias_latent_cnn, tl.ones([1, z.size()[1]], device=None), relu_latentpn, mean_latent_cnn, var_latent_cnn, w_t) return [z, xhat, loss_pn, loss_neg]
def _bound_logvar_lookup(self): self.logvar_lookup.weight.data[torch.le( self.logvar_lookup.weight, self.logvar_bound)] = self.logvar_bound