def script_viterbi(unary, trans, start_idx, end_idx): # type: (Tensor, Tensor, int, int) -> Tuple[Tensor, Tensor] backpointers = [] alphas = torch.full((1, unary.size(1)), -1e4, dtype=unary.dtype, device=unary.device) alphas[0, start_idx] = 0 for i in range(unary.size(0)): unary_t = unary[i, :] next_tag_var = alphas + trans viterbi, best_tag_ids = torch.max(next_tag_var, 1) backpointers.append(best_tag_ids) alphas = viterbi + unary_t alphas = alphas.unsqueeze(0) terminal_vars = alphas.squeeze(0) + trans[end_idx, :] path_score, best_tag_id = torch.max(terminal_vars, 0) best_path = [best_tag_id] for i in range(len(backpointers)): i = len(backpointers) - i - 1 best_tag_id = backpointers[i][best_tag_id] best_path.append(best_tag_id) new_path = [] for i in range(len(best_path)): i = len(best_path) - i - 1 new_path.append(best_path[i]) return torch.stack(new_path[1:]), path_score
def forward(self, features, rois): batch_size, num_channels, data_height, data_width = features.size() num_rois = rois.size()[0] outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda() for roi_ind, roi in enumerate(rois): batch_ind = int(roi[0].data[0]) roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round( roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int) roi_width = max(roi_end_w - roi_start_w + 1, 1) roi_height = max(roi_end_h - roi_start_h + 1, 1) bin_size_w = float(roi_width) / float(self.pooled_width) bin_size_h = float(roi_height) / float(self.pooled_height) for ph in range(self.pooled_height): hstart = int(np.floor(ph * bin_size_h)) hend = int(np.ceil((ph + 1) * bin_size_h)) hstart = min(data_height, max(0, hstart + roi_start_h)) hend = min(data_height, max(0, hend + roi_start_h)) for pw in range(self.pooled_width): wstart = int(np.floor(pw * bin_size_w)) wend = int(np.ceil((pw + 1) * bin_size_w)) wstart = min(data_width, max(0, wstart + roi_start_w)) wend = min(data_width, max(0, wend + roi_start_w)) is_empty = (hend <= hstart) or(wend <= wstart) if is_empty: outputs[roi_ind, :, ph, pw] = 0 else: data = features[batch_ind] outputs[roi_ind, :, ph, pw] = torch.max( torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 2)[0].view(-1) return outputs
def bbox_iou(box1, box2, x1y1x2y2=True): """ Returns the IoU of two bounding boxes """ if not x1y1x2y2: # Transform from center and width to exact coordinates b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2 b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2 b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2 b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2 else: # Get the coordinates of bounding boxes b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3] # get the corrdinates of the intersection rectangle inter_rect_x1 = torch.max(b1_x1, b2_x1) inter_rect_y1 = torch.max(b1_y1, b2_y1) inter_rect_x2 = torch.min(b1_x2, b2_x2) inter_rect_y2 = torch.min(b1_y2, b2_y2) # Intersection area inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp( inter_rect_y2 - inter_rect_y1 + 1, min=0 ) # Union Area b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1) b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1) iou = inter_area / (b1_area + b2_area - inter_area + 1e-16) return iou
def bbox_ious(boxes1, boxes2, x1y1x2y2=True): if x1y1x2y2: mx = torch.min(boxes1[0], boxes2[0]) Mx = torch.max(boxes1[2], boxes2[2]) my = torch.min(boxes1[1], boxes2[1]) My = torch.max(boxes1[3], boxes2[3]) w1 = boxes1[2] - boxes1[0] h1 = boxes1[3] - boxes1[1] w2 = boxes2[2] - boxes2[0] h2 = boxes2[3] - boxes2[1] else: mx = torch.min(boxes1[0]-boxes1[2]/2.0, boxes2[0]-boxes2[2]/2.0) Mx = torch.max(boxes1[0]+boxes1[2]/2.0, boxes2[0]+boxes2[2]/2.0) my = torch.min(boxes1[1]-boxes1[3]/2.0, boxes2[1]-boxes2[3]/2.0) My = torch.max(boxes1[1]+boxes1[3]/2.0, boxes2[1]+boxes2[3]/2.0) w1 = boxes1[2] h1 = boxes1[3] w2 = boxes2[2] h2 = boxes2[3] uw = Mx - mx uh = My - my cw = w1 + w2 - uw ch = h1 + h2 - uh mask = ((cw <= 0) + (ch <= 0) > 0) area1 = w1 * h1 area2 = w2 * h2 carea = cw * ch carea[mask] = 0 uarea = area1 + area2 - carea return carea/uarea
def rmac(features, rmac_levels, pca=None): nim, nc, xd, yd = features.size() rmac_regions = image_helper.get_rmac_region_coordinates(xd, yd, rmac_levels) rmac_regions = rmac_regions.astype(np.int) nr = len(rmac_regions) rmac_descriptors = [] for x0, y0, w, h in rmac_regions: desc = features[:, :, y0:y0 + h, x0:x0 + w] desc = torch.max(desc, 2, keepdim=True)[0] desc = torch.max(desc, 3, keepdim=True)[0] # insert an additional dimension for the cat to work rmac_descriptors.append(desc.view(-1, 1, nc)) rmac_descriptors = torch.cat(rmac_descriptors, 1) rmac_descriptors = normalize_L2(rmac_descriptors, 2) if pca is None: return rmac_descriptors # PCA + whitening npca = pca.n_components rmac_descriptors = pca.apply(rmac_descriptors.view(nr * nim, nc)) rmac_descriptors = normalize_L2(rmac_descriptors, 1) rmac_descriptors = rmac_descriptors.view(nim, nr, npca) # Sum aggregation and L2-normalization rmac_descriptors = torch.sum(rmac_descriptors, 1) rmac_descriptors = normalize_L2(rmac_descriptors, 1) return rmac_descriptors
def forward(self, y_pred, y_true, eps=1e-6): return NotImplementedError torch.nn.modules.loss._assert_no_grad(y_true) assert y_pred.shape[1] == 2 same_left = torch.stack([y_true[:, 0], y_pred[:, 0]], dim=1) same_left, _ = torch.max(same_left, dim=1) same_right = torch.stack([y_true[:, 1], y_pred[:, 1]], dim=1) same_right, _ = torch.min(same_right, dim=1) same_len = same_right - same_left + 1 # (batch_size,) same_len = torch.stack([same_len, torch.zeros_like(same_len)], dim=1) same_len, _ = torch.max(same_len, dim=1) same_len = same_len.type(torch.float) pred_len = (y_pred[:, 1] - y_pred[:, 0] + 1).type(torch.float) true_len = (y_true[:, 1] - y_true[:, 0] + 1).type(torch.float) pre = same_len / (pred_len + eps) rec = same_len / (true_len + eps) f1 = 2 * pre * rec / (pre + rec + eps) return -torch.mean(f1)
def bbox_iou(box1, box2): """ Returns the IoU of two bounding boxes """ #Get the coordinates of bounding boxes b1_x1, b1_y1, b1_x2, b1_y2 = box1[:,0], box1[:,1], box1[:,2], box1[:,3] b2_x1, b2_y1, b2_x2, b2_y2 = box2[:,0], box2[:,1], box2[:,2], box2[:,3] #get the corrdinates of the intersection rectangle inter_rect_x1 = torch.max(b1_x1, b2_x1) inter_rect_y1 = torch.max(b1_y1, b2_y1) inter_rect_x2 = torch.min(b1_x2, b2_x2) inter_rect_y2 = torch.min(b1_y2, b2_y2) #Intersection area inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(inter_rect_y2 - inter_rect_y1 + 1, min=0) #Union Area b1_area = (b1_x2 - b1_x1 + 1)*(b1_y2 - b1_y1 + 1) b2_area = (b2_x2 - b2_x1 + 1)*(b2_y2 - b2_y1 + 1) iou = inter_area / (b1_area + b2_area - inter_area) return iou
def forward(self, features, rois): self.batch_size, self.num_channels, self.data_width, self.data_height, self.data_length = features.size() self.num_rois = rois.size()[0] self.remember_for_backward = torch.zeros(self.num_rois, self.num_channels, self.pooled_width, self.pooled_height, self.pooled_length, 3) - 1 outputs = torch.zeros(self.num_rois, self.num_channels, self.pooled_width, self.pooled_height, self.pooled_length) for roi_ind, roi in enumerate(rois): roi_start_w, roi_start_h, roi_start_l, roi_end_w, roi_end_h, roi_end_l = roi.cpu().numpy() * self.spatial_scale roi_start_w = int(math.floor(roi_start_w)) roi_start_h = int(math.floor(roi_start_h)) roi_start_l = int(math.floor(roi_start_l)) roi_end_w = int(math.ceil(roi_end_w)) roi_end_h = int(math.ceil(roi_end_h)) roi_end_l = int(math.ceil(roi_end_l)) roi_width = max(roi_end_w - roi_start_w, 1) roi_height = max(roi_end_h - roi_start_h, 1) roi_length = max(roi_end_l - roi_start_l, 1) #roi_width = roi_end_w - roi_start_w #roi_height = roi_end_h - roi_start_h #roi_length = roi_end_l - roi_start_l #if roi_width < 1 or roi_height < 1 or roi_length < 1: # continue bin_size_w = float(roi_width) / float(self.pooled_width) bin_size_h = float(roi_height) / float(self.pooled_height) bin_size_l = float(roi_length) / float(self.pooled_length) for pw in range(self.pooled_width): for ph in range(self.pooled_height): for pl in range(self.pooled_length): wstart = int(np.floor(pw * bin_size_w)) hstart = int(np.floor(ph * bin_size_h)) lstart = int(np.floor(pl * bin_size_l)) wend = int(np.ceil((pw + 1) * bin_size_w)) hend = int(np.ceil((ph + 1) * bin_size_h)) lend = int(np.ceil((pl + 1) * bin_size_l)) wstart = min(self.data_width, max(0, wstart + roi_start_w)) hstart = min(self.data_height, max(0, hstart + roi_start_h)) lstart = min(self.data_length, max(0, lstart + roi_start_l)) wend = min(self.data_width, max(0, wend + roi_start_w)) hend = min(self.data_height, max(0, hend + roi_start_h)) lend = min(self.data_length, max(0, lend + roi_start_l)) is_empty = (hend <= hstart) or(wend <= wstart) or (lend <= lstart) if is_empty: outputs[roi_ind, :, pw, ph, pl] = 0 else: data = features[0] outputs[roi_ind, :, pw, ph, pl] = torch.max(torch.max(torch.max(data[:, wstart:wend, hstart:hend, lstart:lend], 1)[0], 1)[0], 1)[0].view(-1) for c in range(self.num_channels): ind_w, ind_h, ind_l = np.unravel_index(data[c, wstart:wend, hstart:hend, lstart:lend].numpy().argmax(), data[c, wstart:wend, hstart:hend, lstart:lend].numpy().shape) self.remember_for_backward[roi_ind, c, pw, ph, pl] = torch.from_numpy(np.array([ind_w+wstart, ind_h+hstart, ind_l+lstart])).float() return outputs
def log_sum_exp(x): """ numerically stable log_sum_exp implementation that prevents overflow """ # TF ordering axis = len(x.size()) - 1 m, _ = torch.max(x, dim=axis) m2, _ = torch.max(x, dim=axis, keepdim=True) return m + torch.log(torch.sum(torch.exp(x - m2), dim=axis))
def test(net, dataloader, tag=''): correct = 0 total = 0 if tag == 'Train': dataTestLoader = dataloader.trainloader else: dataTestLoader = dataloader.testloader with torch.no_grad(): for data in dataTestLoader: images, labels = data outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() net.log('%s Accuracy of the network: %d %%' % (tag, 100 * correct / total)) class_correct = list(0. for i in range(10)) class_total = list(0. for i in range(10)) with torch.no_grad(): for data in dataTestLoader: images, labels = data outputs = net(images) _, predicted = torch.max(outputs, 1) c = (predicted == labels).squeeze() for i in range(len(labels)): label = labels[i] class_correct[label] += c[i].item() class_total[label] += 1 for i in range(10): net.log('%s Accuracy of %5s : %2d %%' % ( tag, dataloader.classes[i], 100 * class_correct[i] / class_total[i]))
def bbox_overlaps(boxes, query_boxes): """ Parameters ---------- boxes: (N, 4) ndarray or tensor or variable query_boxes: (K, 4) ndarray or tensor or variable Returns ------- overlaps: (N, K) overlap between boxes and query_boxes """ if isinstance(boxes, np.ndarray): boxes = torch.from_numpy(boxes) query_boxes = torch.from_numpy(query_boxes) out_fn = lambda x: x.numpy() # If input is ndarray, turn the overlaps back to ndarray when return else: out_fn = lambda x: x box_areas = (boxes[:, 2] - boxes[:, 0] + 1) * \ (boxes[:, 3] - boxes[:, 1] + 1) query_areas = (query_boxes[:, 2] - query_boxes[:, 0] + 1) * \ (query_boxes[:, 3] - query_boxes[:, 1] + 1) iw = (torch.min(boxes[:, 2:3], query_boxes[:, 2:3].t()) - torch.max( boxes[:, 0:1], query_boxes[:, 0:1].t()) + 1).clamp(min=0) ih = (torch.min(boxes[:, 3:4], query_boxes[:, 3:4].t()) - torch.max( boxes[:, 1:2], query_boxes[:, 1:2].t()) + 1).clamp(min=0) ua = box_areas.view(-1, 1) + query_areas.view(1, -1) - iw * ih overlaps = iw * ih / ua return out_fn(overlaps)
def predictive_elbo(self, x, k, s): # No pW or qW self.B = x.size()[0] #batch size # self.k = k #number of z samples aka particles P # self.s = s #number of W samples elbo1s = [] for i in range(s): Ws, logpW, logqW = self.sample_W() #_ , [1], [1] mu, logvar = self.encode(x) #[B,Z] z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B] x_hat = self.decode(Ws, z) #[P,B,X] logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] # elbo1 = elbo1 #+ (logpW - logqW)*.00000001 #[B], logp(x|W)p(w)/q(w) elbo1s.append(elbo) elbo1s = torch.stack(elbo1s) #[S,B] if s>1: max_ = torch.max(elbo1s, 0)[0] #[B] elbo1 = torch.log(torch.mean(torch.exp(elbo1s - max_), 0)) + max_ #[B] elbo = torch.mean(elbo1s) #[1] return elbo#, logprobs2[0], logprobs2[1], logprobs2[2], logprobs2[3], logprobs2[4]
def mio_module(self, each_mmbox, len_conf): chunk = torch.chunk(each_mmbox, each_mmbox.shape[1], 1) bmax = torch.max(torch.max(chunk[0], chunk[1]), chunk[2]) cls = ( torch.cat([bmax,chunk[3]], dim=1) if len_conf==0 else torch.cat([chunk[3],bmax],dim=1) ) if len(chunk)==6: cls = torch.cat([cls, chunk[4], chunk[5]], dim=1) elif len(chunk)==8: cls = torch.cat([cls, chunk[4], chunk[5], chunk[6], chunk[7]], dim=1) return cls
def forward(self, match_encoders): ''' match_encoders (pn_steps, batch, hidden_size*2) ''' vh_matrix = self.vh_net(match_encoders) # pn_steps, batch, hidden_size # prediction start h0 = Variable(torch.zeros(match_encoders.size(1), self.hidden_size)).cuda() c0 = Variable(torch.zeros(match_encoders.size(1), self.hidden_size)).cuda() wha1 = self.wa_net(h0) # bacth, hidden_size wha1 = wha1.expand(match_encoders.size(0), wha1.size(0), wha1.size(1)) # pn_steps, batch, hidden_size #print ('_sum.size() ', _sum.size()) #print ('vh_matrix.size() ', vh_matrix.size()) f1 = self.tanh(vh_matrix + wha1) # pn_steps, batch, hidden_size #print ('f1.size() ', f1.size()) vf1 = self.v_net(f1.transpose(0, 1)).squeeze(-1) #batch, pn_steps beta1 = self.softmax(vf1) #batch, pn_steps softmax_beta1 = self.softmax(beta1).view(beta1.size(0), 1, beta1.size(1)) #batch, 1, pn_steps inp = torch.bmm(softmax_beta1, match_encoders.transpose(0, 1)) # bacth, 1, hidden_size inp = inp.squeeze(1) # bacth, hidden_size h1, c1 = self.pointer_lstm(inp, (h0, c0)) wha2 = self.wa_net(h1) # bacth, hidden_size wha2 = wha2.expand(match_encoders.size(0), wha2.size(0), wha2.size(1)) # pn_steps, batch, hidden_size f2 = self.tanh(vh_matrix + wha2) # pn_steps, batch, hidden_size vf2 = self.v_net(f2.transpose(0, 1)).squeeze(-1) #batch, pn_steps beta2 = self.softmax(vf2)#batch, pn_steps softmax_beta2 = self.softmax(beta2).view(beta2.size(0), 1, beta2.size(1)) #batch, 1, pn_steps inp = torch.bmm(softmax_beta2, match_encoders.transpose(0, 1)) # bacth, 1, hidden_size inp = inp.squeeze(1) # bacth, hidden_size h2, c2 = self.pointer_lstm(inp, (h1, c1)) _, start = torch.max(beta1, 1) _, end = torch.max(beta2, 1) beta1 = beta1.view(1, beta1.size(0), beta1.size(1)) beta2 = beta2.view(1, beta2.size(0), beta2.size(1)) logits = torch.cat([beta1, beta2]) start = start.view(1, start.size(0)) end = end.view(1, end.size(0)) prediction = torch.cat([start, end]).transpose(0, 1).cpu().data.numpy() return logits, prediction
def updateOutput(self, input): self._lazyInit() dimension = self._getPositiveDimension(input) torch.max(input, dimension, out=(self._output, self._indices), keepdim=True) if input.dim() > 1: self.output.set_(self._output.select(dimension, 0)) else: self.output.set_(self._output) return self.output
def argmax(vec): """ Returns the arg max as an int """ if len(vec.size()) == 1: _, idx = torch.max(vec, 0) else: _, idx = torch.max(vec, 1) return idx.item()
def blur_frame(self,frame): aaa = 0 if aaa ==0: if torch.max(frame) > 1.: print ('DDDDDDDD') print (torch.max(frame).data.cpu().numpy()) fasdf K = 21 #11 #21 padding = 10# 5 filter_weights = torch.ones(1,1,K,K).cuda() filter_weights = filter_weights / K**2 frame_c0 = frame[:,0].unsqueeze(1) # print (frame_c0.shape) frame_c0 = F.conv2d(input=frame_c0, weight=filter_weights, bias=None, padding=padding, stride=1, dilation=1) # print (frame_c0.size()) # print ('Output: [B,outC,outH,outW]') # print () # print (torch.max(frame_c0).data.cpu().numpy()) frame_c1 = frame[:,1].unsqueeze(1) frame_c1 = F.conv2d(input=frame_c1, weight=filter_weights, bias=None, padding=padding, stride=1, dilation=1) # print (torch.max(frame_c1).data.cpu().numpy()) frame_c2 = frame[:,2].unsqueeze(1) frame_c2 = F.conv2d(input=frame_c2, weight=filter_weights, bias=None, padding=padding, stride=1, dilation=1) # print (torch.max(frame_c2).data.cpu().numpy()) # fdsfa blurred_image = [frame_c0, frame_c1, frame_c2] blurred_image = torch.stack(blurred_image, dim=1) # print (blurred_image.shape) blurred_image = blurred_image.squeeze(dim=2) #[B,3,480,640] # blurred_image = blurred_image / torch.max(blurred_image) blurred_image = torch.clamp(blurred_image, max=1.0) # print (torch.max(blurred_image).data.cpu().numpy()) # fas else: blurred_image = torch.zeros(frame.size()[0],3,480,640).cuda() return blurred_image
def dec(self, encoders, decoder_inputs, is_teacher_forcing, max_question_len): ''' encoders (batch, hidden_size) if is_teacher_forcing: decoder_inputs (batch, max_question_len) if not is_teacher_forcing: decoder_inputs (batch, 1) ''' decoder_inputs = Variable(decoder_inputs).long().cuda() decoder_inputs = self.embedding(decoder_inputs) decoder_inputs = decoder_inputs.transpose(0, 1) encoders = encoders.expand(decoder_inputs.size(0), encoders.size(0), self.hidden_size*2) inputs = torch.cat([decoder_inputs, encoders], -1) if is_teacher_forcing: outputs, hidden = self.dec_net(inputs) outputs = self.dropout(outputs) logits = self.fc_net(outputs) # qn_steps, batch, voc_size _, predictions = torch.max(logits.transpose(0, 1), -1) #batch, qn_steps predictions = predictions.cpu().data.numpy() else: logits = [0 for i in range(max_question_len)] predictions = [0 for i in range(max_question_len)] output, hidden = self.dec_net(inputs) output = self.dropout(output) logits[0] = self.fc_net(output) _, index = torch.max(logits[0]) logits[0] = logits[0].view(1, decoder_inputs.size(1), self.voc_size) # 1,batch_size, voc_size predictions[0] = index.cpu().data.numpy() # batch_size for i in range(1, max_question_len): prev_output = Variable(predictions[i-1]).long().cuda() prev_output = self.embedding(prev_output) inputs = torch.cat([prev_output, encoders[0]], -1) output, hidden = self.dec_net(inputs, hidden) output = self.dropout(output) logits[i] = self.fc_net(output) _, index = torch.max(logits[i]) logits[i] = logits[i].view(1, decoder_inputs.size(0), self.voc_size) # 1,batch_size, voc_size predictions[i] = index.cpu().data.numpy() # batch_size logits = torch.cat(logits)# qn_steps, batch, voc_size predictions = np.array(predictions).transpose(1, 0) return logits, predictions
def eval_loss(net, criterion, loader, use_cuda=False): """ Evaluate the loss value for a given 'net' on the dataset provided by the loader. Args: net: the neural net model criterion: loss function loader: dataloader use_cuda: use cuda or not Returns: loss value and accuracy """ correct = 0 total_loss = 0 total = 0 # number of samples num_batch = len(loader) if use_cuda: net.cuda() net.eval() with torch.no_grad(): if isinstance(criterion, nn.CrossEntropyLoss): for batch_idx, (inputs, targets) in enumerate(loader): batch_size = inputs.size(0) total += batch_size inputs = Variable(inputs) targets = Variable(targets) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() outputs = net(inputs) loss = criterion(outputs, targets) total_loss += loss.item()*batch_size _, predicted = torch.max(outputs.data, 1) correct += predicted.eq(targets).sum().item() elif isinstance(criterion, nn.MSELoss): for batch_idx, (inputs, targets) in enumerate(loader): batch_size = inputs.size(0) total += batch_size inputs = Variable(inputs) one_hot_targets = torch.FloatTensor(batch_size, 10).zero_() one_hot_targets = one_hot_targets.scatter_(1, targets.view(batch_size, 1), 1.0) one_hot_targets = one_hot_targets.float() one_hot_targets = Variable(one_hot_targets) if use_cuda: inputs, one_hot_targets = inputs.cuda(), one_hot_targets.cuda() outputs = F.softmax(net(inputs)) loss = criterion(outputs, one_hot_targets) total_loss += loss.item()*batch_size _, predicted = torch.max(outputs.data, 1) correct += predicted.cpu().eq(targets).sum().item() return total_loss/total, 100.*correct/total
def forward(self, sent_tuple): # sent_len: [max_len, ..., min_len] (batch) # sent: Variable(seqlen x batch x worddim) sent, sent_len = sent_tuple bsize = sent.size(1) self.init_lstm = self.init_lstm if bsize == self.init_lstm.size(1) else \ Variable(torch.FloatTensor(2, bsize, self.enc_lstm_dim).zero_()).cuda() # Sort by length (keep idx) sent_len, idx_sort = np.sort(sent_len)[::-1], np.argsort(-sent_len) sent = sent.index_select(1, Variable(torch.cuda.LongTensor(idx_sort))) # Handling padding in Recurrent Networks sent_packed = nn.utils.rnn.pack_padded_sequence(sent, sent_len) sent_output = self.enc_lstm(sent_packed, (self.init_lstm, self.init_lstm))[0] # seqlen x batch x 2*nhid sent_output = nn.utils.rnn.pad_packed_sequence(sent_output)[0] # Un-sort by length idx_unsort = np.argsort(idx_sort) sent_output = sent_output.index_select(1, Variable(torch.cuda.LongTensor(idx_unsort))) sent_output = sent_output.transpose(0,1).contiguous() sent_output_proj = self.proj_lstm(sent_output.view(-1, 2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim) sent_keys = self.proj_enc(sent_output.view(-1, 2*self.enc_lstm_dim)).view(bsize, -1, 2*self.enc_lstm_dim) sent_max = torch.max(sent_output, 1)[0].squeeze(1) # (bsize, 2*nhid) sent_summary = self.proj_query( sent_max).unsqueeze(1).expand_as(sent_keys) # (bsize, seqlen, 2*nhid) sent_M = torch.tanh(sent_keys + sent_summary) # (bsize, seqlen, 2*nhid) YANG : M = tanh(Wh_i + Wh_avg sent_w = self.query_embedding(Variable(torch.LongTensor( bsize*[0]).cuda())).unsqueeze(2) # (bsize, 2*nhid, 1) sent_alphas = self.softmax(sent_M.bmm(sent_w).squeeze(2)).unsqueeze(1) # (bsize, 1, seqlen) if int(time.time()) % 200 == 0: print('w', torch.max(sent_w[0]), torch.min(sent_w[0])) print('alphas', sent_alphas[0][0][0:sent_len[0]]) # Get attention vector emb = sent_alphas.bmm(sent_output_proj).squeeze(1) return emb
def forward(self, frame, DQNs): # x: [B,2,84,84] self.B = frame.size()[0] #Predict mask mask = self.predict_mask(frame) #[B,2,210,160] # print (mask.size()) mask = mask.repeat(1,3,1,1) # masked_frame = frame * mask bias_frame = Variable(torch.ones(1,3,480,640).cuda()) * F.sigmoid(self.bias_frame.bias_frame) masked_frame = frame * mask + (1.-mask)*bias_frame # print (torch.max(frame)) # print (torch.min(frame)) # print (torch.mean(frame)) # print (torch.max(mask)) # print (torch.min(mask)) # print (torch.mean(mask)) # print (torch.max(masked_frame)) # print (torch.min(masked_frame)) # print (torch.mean(masked_frame)) # fsaf difs= [] for i in range(len(DQNs)): q_mask = DQNs[i](masked_frame) val, index = torch.max(q_mask, 1) q_mask = q_mask[:,index] q_real = DQNs[i](frame) val, index = torch.max(q_real, 1) q_real = q_real[:,index] dif = torch.mean((q_mask-q_real)**2) #[B,A] difs.append(dif) difs = torch.stack(difs) dif = torch.mean(difs) mask = mask.view(self.B, -1) mask_sum = torch.mean(torch.sum(mask, dim=1)) * .0000001 loss = dif + mask_sum return loss, dif, mask_sum
def train(train_X): model.train() total_loss = 0. num_correct = 0 num_total = 0 # avged per mini-batch for batch, i in enumerate( xrange(0, len(train_X.data) - BATCH_SIZE, BATCH_SIZE)): batch_loss = 0. X = train_X[i:i + BATCH_SIZE, :, :] model.init_stack(BATCH_SIZE) valid_X = (X[:, :, len(code_for) - 1] != 1).type(torch.FloatTensor) ############################################ # this set valid_X to 0 for any symbol not in list to predict for k in xrange(BATCH_SIZE): for j in xrange(MAX_LENGTH): if (not (in_predict_codes(X[k, j, :].data))): valid_X[k, j] = 0 # print "X[0,:,:] = {}".format(X[0,:,:]) # print "valid_X[0] = {}".format(valid_X[0]) ############################################ for j in xrange(1, MAX_LENGTH): a = model.forward(X[:, j - 1, :]) _, y = torch.max(X[:, j, :], 1) _, y_pred = torch.max(a, 1) batch_loss += torch.mean(valid_X[:, j] * criterion(a, y)) num_correct += sum( (valid_X[:, j] * (y_pred == y).type(torch.FloatTensor)).data) num_total += sum(valid_X[:, j].data) # update the weights optimizer.zero_grad() batch_loss.backward() optimizer.step() total_loss += batch_loss.data if batch % 10 == 0: print "batch {}: loss={:.4f}, acc={:.2f}".format(batch, sum( batch_loss.data), num_correct / num_total)
def forward(self, x, k, s): self.B = x.size()[0] #batch size # self.k = k #number of z samples aka particles P # self.s = s #number of W samples elbo1s = [] logprobs = [[] for _ in range(5)] for i in range(s): Ws, logpW, logqW = self.sample_W() #_ , [1], [1] mu, logvar = self.encode(x) #[B,Z] z, logpz, logqz = self.sample_z(mu, logvar, k=k) #[P,B,Z], [P,B] x_hat = self.decode(Ws, z) #[P,B,X] logpx = log_bernoulli(x_hat, x) #[P,B] elbo = logpx + logpz - logqz #[P,B] if k>1: max_ = torch.max(elbo, 0)[0] #[B] elbo1 = torch.log(torch.mean(torch.exp(elbo - max_), 0)) + max_ #[B] elbo = elbo + (logpW*.000001) - (logqW*self.qW_weight) #[B], logp(x|W)p(w)/q(w) elbo1s.append(elbo) logprobs[0].append(torch.mean(logpx)) logprobs[1].append(torch.mean(logpz)) logprobs[2].append(torch.mean(logqz)) logprobs[3].append(torch.mean(logpW)) logprobs[4].append(torch.mean(logqW)) elbo1s = torch.stack(elbo1s) #[S,B] if s>1: max_ = torch.max(elbo1s, 0)[0] #[B] elbo1 = torch.log(torch.mean(torch.exp(elbo1s - max_), 0)) + max_ #[B] elbo = torch.mean(elbo1s) #[1] #for printing # logpx = torch.mean(logpx) # logpz = torch.mean(logpz) # logqz = torch.mean(logqz) # self.x_hat_sigmoid = F.sigmoid(x_hat) logprobs2 = [torch.mean(torch.stack(aa)) for aa in logprobs] return elbo, logprobs2[0], logprobs2[1], logprobs2[2], logprobs2[3], logprobs2[4]
def max_out(x): # make sure s2 is even and that the input is 2 dimension if len(x.size()) == 2: s1, s2 = x.size() x = x.unsqueeze(1) x = x.view(s1, s2 // 2, 2) x, _ = torch.max(x, 2) elif len(x.size()) == 3: s1, s2, s3 = x.size() x = x.unsqueeze(1) x = x.view(s1, s2, s3 // 2, 2) x, _ = torch.max(x, 3) return x
def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data state = self.state[p] # State initialization if len(state) == 0: state['step'] = 0 state['exp_avg'] = grad.new().resize_as_(grad).zero_() state['exp_inf'] = grad.new().resize_as_(grad).zero_() exp_avg, exp_inf = state['exp_avg'], state['exp_inf'] beta1, beta2 = group['betas'] eps = group['eps'] state['step'] += 1 if group['weight_decay'] != 0: grad = grad.add(group['weight_decay'], p.data) # Update biased first moment estimate. exp_avg.mul_(beta1).add_(1 - beta1, grad) # Update the exponentially weighted infinity norm. norm_buf = torch.cat([ exp_inf.mul_(beta2).unsqueeze(0), grad.abs().add_(eps).unsqueeze_(0) ], 0) torch.max(norm_buf, 0, keepdim=False, out=(exp_inf, exp_inf.new().long())) bias_correction = 1 - beta1 ** state['step'] clr = group['lr'] / bias_correction p.data.addcdiv_(-clr, exp_avg, exp_inf) return loss
def evaluate_autoencoder(data_source, epoch): # Turn on evaluation mode which disables dropout. autoencoder.eval() total_loss = 0 ntokens = len(corpus.dictionary.word2idx) all_accuracies = 0 bcnt = 0 for i, batch in enumerate(data_source): source, target, lengths = batch source = to_gpu(args.cuda, Variable(source, volatile=True)) target = to_gpu(args.cuda, Variable(target, volatile=True)) mask = target.gt(0) masked_target = target.masked_select(mask) # examples x ntokens output_mask = mask.unsqueeze(1).expand(mask.size(0), ntokens) # output: batch x seq_len x ntokens output = autoencoder(source, lengths, noise=True) flattened_output = output.view(-1, ntokens) masked_output = \ flattened_output.masked_select(output_mask).view(-1, ntokens) total_loss += criterion_ce(masked_output/args.temp, masked_target).data # accuracy max_vals, max_indices = torch.max(masked_output, 1) all_accuracies += \ torch.mean(max_indices.eq(masked_target).float()).data[0] bcnt += 1 aeoutf = "./output/%s/%d_autoencoder.txt" % (args.outf, epoch) with open(aeoutf, "a") as f: max_values, max_indices = torch.max(output, 2) max_indices = \ max_indices.view(output.size(0), -1).data.cpu().numpy() target = target.view(output.size(0), -1).data.cpu().numpy() for t, idx in zip(target, max_indices): # real sentence chars = " ".join([corpus.dictionary.idx2word[x] for x in t]) f.write(chars) f.write("\n") # autoencoder output sentence chars = " ".join([corpus.dictionary.idx2word[x] for x in idx]) f.write(chars) f.write("\n\n") return total_loss[0] / len(data_source), all_accuracies/bcnt
def evaluate(attention_model,x_test,y_test): """ cv results Args: attention_model : {object} model x_test : {nplist} x_test y_test : {nplist} y_test Returns: cv-accuracy """ attention_model.batch_size = x_test.shape[0] attention_model.hidden_state = attention_model.init_hidden() x_test_var = Variable(torch.from_numpy(x_test).type(torch.LongTensor)) y_test_pred,_ = attention_model(x_test_var) if bool(attention_model.type): y_preds = torch.max(y_test_pred,1)[1] y_test_var = Variable(torch.from_numpy(y_test).type(torch.LongTensor)) else: y_preds = torch.round(y_test_pred.type(torch.DoubleTensor).squeeze(1)) y_test_var = Variable(torch.from_numpy(y_test).type(torch.DoubleTensor)) return torch.eq(y_preds,y_test_var).data.sum()/x_test_var.size(0)
def get_log_q(self): # self.log_class_weights = self.gmm_encoder.forward(self.y) fudge_lower_bdd = torch.Tensor([-8]) self.log_class_weights = log_softmax(torch.max(self.var_params['free_class_weights'], fudge_lower_bdd)) # return self.log_class_weights
def fit(self, X, Y, n_epoch=10, batch_size=128, en_shuffle=True): global_step = 0 n_batch = len(X) / batch_size total_steps = int(n_epoch * n_batch) for epoch in range(n_epoch): if en_shuffle: shuffled = np.random.permutation(len(X)) X = X[shuffled] Y = Y[shuffled] state = None for local_step, (X_batch, Y_batch) in enumerate(zip(self.gen_batch(X, batch_size), self.gen_batch(Y, batch_size))): y_batch = Y_batch.ravel() inputs = torch.autograd.Variable(torch.from_numpy(X_batch.astype(np.int64))) labels = torch.autograd.Variable(torch.from_numpy(y_batch.astype(np.int64))) if (self.stateful) and (len(X_batch) == batch_size): preds, state = self.forward(inputs, state) state = (torch.autograd.Variable(state[0].data), torch.autograd.Variable(state[1].data)) else: preds, _ = self.forward(inputs) loss = self.criterion(preds, labels) # cross entropy loss self.optimizer, lr = self.adjust_lr(self.optimizer, global_step, total_steps) self.optimizer.zero_grad() # clear gradients for this training step loss.backward() # backpropagation, compute gradients self.optimizer.step() # apply gradients global_step += 1 preds = torch.max(preds, 1)[1].data.numpy().squeeze() acc = (preds == y_batch).mean() if local_step % 100 == 0: print ('Epoch [%d/%d] | Step [%d/%d] | Loss: %.4f | Acc: %.4f | LR: %.4f' %(epoch+1, n_epoch, local_step, n_batch, loss.data[0], acc, lr))
def predict(self, dataset): """Predict target for dataset. Parameters: ---------- dataset (dict): dictionary with the testing dataset - X_wide_test, X_deep_test, target Returns: -------- array-like with the target for dataset """ X_w = Variable(torch.from_numpy(dataset.wide)).float() X_d = Variable(torch.from_numpy(dataset.deep)) if use_cuda: X_w, X_d = X_w.cuda(), X_d.cuda() # set the model in evaluation mode so dropout is not applied net = self.eval() pred = net(X_w,X_d).cpu() if self.method == "regression": return pred.squeeze(1).data.numpy() if self.method == "logistic": return (pred > 0.5).squeeze(1).data.numpy() if self.method == "multiclass": _, pred_cat = torch.max(pred, 1) return pred_cat.data.numpy()
def adversarial_learning(best_cla_model_path): # Device configuration device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # print(device) parser = argparse.ArgumentParser("Image classifical!") parser.add_argument('--input_dir_trainSet', type=str, default='D:/python_workplace/resnet-AE/checkpoint/Joint_Training/ResNet18/cifar10/train/train.pkl', help='data set dir path') parser.add_argument('--input_dir_testSet', type=str, default='D:/python_workplace/resnet-AE/checkpoint/Joint_Training/ResNet18/cifar10/test/test.pkl', help='data set dir path') parser.add_argument('--epochs', type=int, default=300, help='Epoch default:50.') parser.add_argument('--image_size', type=int, default=32, help='Image Size default:28.') parser.add_argument('--batch_size', type=int, default=512, help='Batch_size default:256.') parser.add_argument('--lr', type=float, default=0.01, help='learing_rate. Default=0.01') parser.add_argument('--num_classes', type=int, default=10, help='num classes') parser.add_argument('--model_path', type=str, default='D:/python_workplace/resnet-AE/checkpoint/AdversarialLearning/ResNet18/cifar10/model/', help='Save model path') parser.add_argument('--acc_file_path', type=str, default='D:/python_workplace/resnet-AE/checkpoint/AdversarialLearning/ResNet18/cifar10/acc.txt', help='Save accuracy file') parser.add_argument('--best_acc_file_path', type=str, default='D:/python_workplace/resnet-AE/checkpoint/' 'AdversarialLearning/ResNet18/cifar10/best_acc.txt', help='Save best accuracy file') parser.add_argument('--log_file_path', type=str, default='D:/python_workplace/resnet-AE/checkpoint/AdversarialLearning/ResNet18/cifar10/log.txt', help='Save log file') args = parser.parse_args() # Load model model = resnet_cifar.resnet18(pretrained=False) model.to(device) # summary(model,(3,32,32)) # print(model) # Load pre-trained weights model.load_state_dict(torch.load(best_cla_model_path)) model.to(device) # criterion criterion = nn.CrossEntropyLoss().to(device) # batch_shape batch_shape = [args.batch_size, 3, args.image_size, args.image_size] best_acc_clean = 0 # 初始化best clean test set accuracy best_acc_adv = 0 # 初始化best adv test set accuracy best_epoch = 0 # 初始化best epoch time_k = time.time() print("Start Adversarial Training, Resnet-18!") with open(args.acc_file_path, "w") as f1: with open(args.log_file_path, "w")as f2: for epoch in range(0, args.epochs): if epoch + 1 <= 100: args.lr = 0.1 elif 100 < epoch + 1 <= 200: args.lr = 0.01 elif 200 < epoch + 1 <= 250: args.lr = 0.001 else: args.lr = 0.0001 # Optimization optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) print('Epoch: %d' % (epoch + 1)) sum_loss = 0.0 correct = 0.0 total = 0.0 batchId = 1 for batchSize, images_train, labels_train in load_train_set(args.input_dir_trainSet, batch_shape): start = time.time() # data prepare images_train = torch.from_numpy(images_train).type(torch.FloatTensor).to(device) labels_train = torch.from_numpy(labels_train).type(torch.LongTensor).to(device) model.to(device) model.train() optimizer.zero_grad() # forward + backward outputs = model(images_train) loss = criterion(outputs, labels_train) loss.backward() optimizer.step() # 每训练1个batch打印一次loss和准确率 sum_loss += loss.item() _, predicted = torch.max(outputs.data, 1) total += labels_train.size(0) correct += predicted.eq(labels_train.data).cpu().sum().item() # print(100.* correct / total) end = time.time() print('[Epoch:%d/%d] | [Batch:%d/%d] | Loss: %.03f | Acc: %.2f%% | Lr: %.04f | Time: %.03fs' % (epoch + 1, args.epochs, batchId, (100000 / args.batch_size) + 1, sum_loss / batchId, correct / total * 100, args.lr, (end - start))) f2.write('[Epoch:%d/%d] | [Batch:%d/%d] | Loss: %.03f | Acc: %.2f%% | Lr: %.4f | Time: %.3fs' % (epoch + 1, args.epochs, batchId, (100000 / args.batch_size) + 1, sum_loss / batchId, correct / total * 100, args.lr, (end - start))) f2.write('\n') f2.flush() batchId += 1 # 每训练完一个epoch测试一下准确率 if (epoch + 1) % 50 == 0: print("Waiting for Testing!") with torch.no_grad(): # 测试clean test set correct_clean = 0 total_clean = 0 for batchSize, images_test_clean, labels_test_clean in load_test_set_clean(args.input_dir_testSet, batch_shape): model.eval() # data prepare images_test_clean = torch.from_numpy(images_test_clean).type(torch.FloatTensor).to(device) labels_test_clean = torch.from_numpy(labels_test_clean).type(torch.LongTensor).to(device) model.to(device) outputs = model(images_test_clean) # 取得分最高的那个类 (outputs.data的索引号) _, predicted = torch.max(outputs.data, 1) total_clean += labels_test_clean.size(0) correct_clean += (predicted == labels_test_clean).sum().item() print('Clean Test Set Accuracy:%.2f%%' % (correct_clean / total_clean * 100)) acc_clean = correct_clean / total_clean * 100 # 测试adv test set correct_adv = 0 total_adv = 0 for batchSize, images_test_adv, labels_test_adv in load_test_set_adv(args.input_dir_testSet, batch_shape): model.eval() # data prepare images_test_adv = torch.from_numpy(images_test_adv).type(torch.FloatTensor).to(device) labels_test_adv = torch.from_numpy(labels_test_adv).type(torch.LongTensor).to(device) model.to(device) outputs = model(images_test_adv) # 取得分最高的那个类 (outputs.data的索引号) _, predicted = torch.max(outputs.data, 1) total_adv += labels_test_adv.size(0) correct_adv += (predicted == labels_test_adv).sum().item() print('Adv Test Set Accuracy:%.2f%%' % (correct_adv / total_adv * 100)) acc_adv = correct_adv / total_adv * 100 # 保存测试集准确率至acc.txt文件中 f1.write("Epoch=%03d,Clean Test Set Accuracy= %.2f%%" % (epoch + 1, acc_clean)) f1.write('\n') f1.write("Epoch=%03d,Adv Test Set Accuracy= %.2f%%" % (epoch + 1, acc_adv)) f1.write('\n') f1.flush() # 记录最佳测试分类准确率并写入best_acc.txt文件中并将准确率达标的模型保存 if acc_clean > best_acc_clean and acc_adv > best_acc_adv: if epoch != 49: os.remove(args.model_path + "model_" + str(best_epoch) + ".pth") best_acc_clean = acc_clean best_acc_adv = acc_adv print('Saving model!') torch.save(model.state_dict(), '%s/model_%d.pth' % (args.model_path, epoch + 1)) print('Model saved!') f3 = open(args.best_acc_file_path, "w") f3.write("Epoch=%d,Best Accuracy of Clean Set = %.2f%%,Best Accuracy of Adv Set = %.2f%%" % (epoch + 1, best_acc_clean, best_acc_adv)) f3.close() best_epoch = epoch + 1 time_j = time.time() print("Training Finished, Total Epoch = %d, Best Epoch = %d, Best Accuracy of Clean Set = %.2f%%, " "Best Accuracy of Adv Set = %.2f%%, Total Time = %.2f" % (args.epochs, best_epoch, best_acc_clean, best_acc_adv, (time_j - time_k)/3600))
def main(): """ Launches text to speech (inference). Inference is executed on a single GPU. """ parser = argparse.ArgumentParser(description='PyTorch FastPitch Inference', allow_abbrev=False) parser = parse_args(parser) args, unk_args = parser.parse_known_args() DLLogger.init(backends=[ JSONStreamBackend(Verbosity.DEFAULT, args.log_file), StdOutBackend(Verbosity.VERBOSE) ]) for k, v in vars(args).items(): DLLogger.log(step="PARAMETER", data={k: v}) DLLogger.log(step="PARAMETER", data={'model_name': 'FastPitch_PyT'}) if args.output is not None: Path(args.output).mkdir(parents=False, exist_ok=True) device = torch.device('cuda' if args.cuda else 'cpu') if args.fastpitch is not None: generator = load_and_setup_model('FastPitch', parser, args.fastpitch, args.amp_run, device, unk_args=unk_args, forward_is_infer=True, ema=args.ema, jitable=args.torchscript) if args.torchscript: generator = torch.jit.script(generator) else: generator = None if args.waveglow is not None: with warnings.catch_warnings(): warnings.simplefilter("ignore") waveglow = load_and_setup_model('WaveGlow', parser, args.waveglow, args.amp_run, device, unk_args=unk_args, forward_is_infer=True, ema=args.ema) denoiser = Denoiser(waveglow).to(device) waveglow = getattr(waveglow, 'infer', waveglow) else: waveglow = None if len(unk_args) > 0: raise ValueError(f'Invalid options {unk_args}') fields = load_fields(args.input) batches = prepare_input_sequence(fields, device, args.batch_size, args.dataset_path, load_mels=(generator is None)) if args.include_warmup: # Use real data rather than synthetic - FastPitch predicts len for i in range(3): with torch.no_grad(): if generator is not None: b = batches[0] mel, *_ = generator(b['text'], b['text_lens']) if waveglow is not None: audios = waveglow(mel, sigma=args.sigma_infer).float() _ = denoiser(audios, strength=args.denoising_strength) gen_measures = MeasureTime() waveglow_measures = MeasureTime() gen_kw = { 'pace': args.pace, 'pitch_tgt': None, 'pitch_transform': build_pitch_transformation(args) } if args.torchscript: gen_kw.pop('pitch_transform') all_utterances = 0 all_samples = 0 all_letters = 0 all_frames = 0 reps = args.repeats log_enabled = reps == 1 log = lambda s, d: DLLogger.log(step=s, data=d) if log_enabled else None for repeat in (tqdm.tqdm(range(reps)) if reps > 1 else range(reps)): for idx, b in enumerate(batches): if generator is None: log(0, {'Synthesizing from ground truth mels'}) mel, mel_lens = b['mel'], b['mel_lens'] else: with torch.no_grad(), gen_measures: mel, mel_lens, *_ = generator(b['text'], b['text_lens'], **gen_kw) gen_infer_perf = mel.size(0) * mel.size(2) / gen_measures[-1] all_letters += b['text_lens'].sum().item() all_frames += mel.size(0) * mel.size(2) log(0, {"generator_frames_per_sec": gen_infer_perf}) log(0, {"generator_latency": gen_measures[-1]}) if waveglow is not None: with torch.no_grad(), waveglow_measures: audios = waveglow(mel, sigma=args.sigma_infer) audios = denoiser( audios.float(), strength=args.denoising_strength).squeeze(1) all_utterances += len(audios) all_samples += sum(audio.size(0) for audio in audios) waveglow_infer_perf = (audios.size(0) * audios.size(1) / waveglow_measures[-1]) log(0, {"waveglow_samples_per_sec": waveglow_infer_perf}) log(0, {"waveglow_latency": waveglow_measures[-1]}) if args.output is not None and reps == 1: for i, audio in enumerate(audios): audio = audio[:mel_lens[i].item() * args.stft_hop_length] if args.fade_out: fade_len = args.fade_out * args.stft_hop_length fade_w = torch.linspace(1.0, 0.0, fade_len) audio[-fade_len:] *= fade_w.to(audio.device) audio = audio / torch.max(torch.abs(audio)) fname = b['output'][ idx] if 'output' in b else f'audio_{idx}.wav' audio_path = Path(args.output, fname) write(audio_path, args.sampling_rate, audio.cpu().numpy()) if generator is not None and waveglow is not None: log(0, {"latency": (gen_measures[-1] + waveglow_measures[-1])}) log_enabled = True if generator is not None: gm = np.sort(np.asarray(gen_measures)) log('avg', {"generator letters/s": all_letters / gm.sum()}) log('avg', {"generator_frames/s": all_frames / gm.sum()}) log('avg', {"generator_latency": gm.mean()}) log('90%', { "generator_latency": gm.mean() + norm.ppf((1.0 + 0.90) / 2) * gm.std() }) log('95%', { "generator_latency": gm.mean() + norm.ppf((1.0 + 0.95) / 2) * gm.std() }) log('99%', { "generator_latency": gm.mean() + norm.ppf((1.0 + 0.99) / 2) * gm.std() }) if waveglow is not None: wm = np.sort(np.asarray(waveglow_measures)) log('avg', {"waveglow_samples/s": all_samples / wm.sum()}) log('avg', {"waveglow_latency": wm.mean()}) log('90%', { "waveglow_latency": wm.mean() + norm.ppf((1.0 + 0.90) / 2) * wm.std() }) log('95%', { "waveglow_latency": wm.mean() + norm.ppf((1.0 + 0.95) / 2) * wm.std() }) log('99%', { "waveglow_latency": wm.mean() + norm.ppf((1.0 + 0.99) / 2) * wm.std() }) if generator is not None and waveglow is not None: m = gm + wm rtf = all_samples / (len(batches) * all_utterances * m.mean() * args.sampling_rate) log('avg', {"samples/s": all_samples / m.sum()}) log('avg', {"letters/s": all_letters / m.sum()}) log('avg', {"latency": m.mean()}) log('avg', {"RTF": rtf}) log('90%', {"latency": m.mean() + norm.ppf((1.0 + 0.90) / 2) * m.std()}) log('95%', {"latency": m.mean() + norm.ppf((1.0 + 0.95) / 2) * m.std()}) log('99%', {"latency": m.mean() + norm.ppf((1.0 + 0.99) / 2) * m.std()}) DLLogger.flush()
def linearize_dynamics(self, x, u, dynamics, diff): # TODO: Cleanup variable usage. n_batch = x[0].size(0) if self.grad_method == GradMethods.ANALYTIC: _u = Variable(u[:-1].view(-1, self.n_ctrl), requires_grad=True) _x = Variable(x[:-1].contiguous().view(-1, self.n_state), requires_grad=True) # This inefficiently calls dynamics again, but is worth it because # we can efficiently compute grad_input for every time step at once. _new_x = dynamics(_x, _u) # This check is a little expensive and should only be done if # modifying this code. # assert torch.abs(_new_x.data - torch.cat(x[1:])).max() <= 1e-6 if not diff: _new_x = _new_x.data _x = _x.data _u = _u.data R, S = dynamics.grad_input(_x, _u) f = _new_x - util.bmv(R, _x) - util.bmv(S, _u) f = f.view(self.T - 1, n_batch, self.n_state) R = R.contiguous().view(self.T - 1, n_batch, self.n_state, self.n_state) S = S.contiguous().view(self.T - 1, n_batch, self.n_state, self.n_ctrl) F = torch.cat((R, S), 3) if not diff: F, f = list(map(Variable, [F, f])) return F, f else: # TODO: This is inefficient and confusing. x_init = x[0] x = [x_init] F, f = [], [] for t in range(self.T): if t < self.T - 1: xt = Variable(x[t], requires_grad=True) ut = Variable(u[t], requires_grad=True) xut = torch.cat((xt, ut), 1) new_x = dynamics(xt, ut) # Linear dynamics approximation. if self.grad_method in [ GradMethods.AUTO_DIFF, GradMethods.ANALYTIC_CHECK ]: Rt, St = [], [] for j in range(self.n_state): Rj, Sj = torch.autograd.grad(new_x[:, j].sum(), [xt, ut], retain_graph=True) if not diff: Rj, Sj = Rj.data, Sj.data Rt.append(Rj) St.append(Sj) Rt = torch.stack(Rt, dim=1) St = torch.stack(St, dim=1) if self.grad_method == GradMethods.ANALYTIC_CHECK: assert False # Not updated Rt_autograd, St_autograd = Rt, St Rt, St = dynamics.grad_input(xt, ut) eps = 1e-8 if torch.max(torch.abs(Rt-Rt_autograd)).data[0] > eps or \ torch.max(torch.abs(St-St_autograd)).data[0] > eps: print(''' nmpc.ANALYTIC_CHECK error: The analytic derivative of the dynamics function may be off. ''') else: print(''' nmpc.ANALYTIC_CHECK: The analytic derivative of the dynamics function seems correct. Re-run with GradMethods.ANALYTIC to continue. ''') sys.exit(0) elif self.grad_method == GradMethods.FINITE_DIFF: Rt, St = [], [] for i in range(n_batch): Ri = util.jacobian(lambda s: dynamics(s, ut[i]), xt[i], 1e-4) Si = util.jacobian(lambda a: dynamics(xt[i], a), ut[i], 1e-4) if not diff: Ri, Si = Ri.data, Si.data Rt.append(Ri) St.append(Si) Rt = torch.stack(Rt) St = torch.stack(St) else: assert False Ft = torch.cat((Rt, St), 2) F.append(Ft) if not diff: xt, ut, new_x = xt.data, ut.data, new_x.data ft = new_x - util.bmv(Rt, xt) - util.bmv(St, ut) f.append(ft) if t < self.T - 1: x.append(util.detach_maybe(new_x)) F = torch.stack(F, 0) f = torch.stack(f, 0) if not diff: F, f = list(map(Variable, [F, f])) return F, f
def train(self): self.train_hist = {} self.train_hist['D_loss'] = [] self.train_hist['G_loss'] = [] self.train_hist['per_epoch_time'] = [] self.train_hist['total_time'] = [] self.y_real_, self.y_fake_ = torch.ones(self.batch_size, 1), torch.zeros(self.batch_size, 1) if self.gpu_mode: self.y_real_, self.y_fake_ = self.y_real_.cuda(), self.y_fake_.cuda() self.D.train() print('training start!!') start_time = time.time() for epoch in range(self.epoch): self.G.train() epoch_start_time = time.time() for iter, (x_, y_) in enumerate(self.data_loader): if iter == self.data_loader.dataset.__len__() // self.batch_size: break z_ = torch.rand((self.batch_size, self.z_dim)) # y_vec_ = torch.zeros((self.batch_size, self.class_num)).scatter_(1, y_.type(torch.LongTensor).unsqueeze(1), 1) # print(y_vec_x.size()) y_vec_ = y_.type(torch.FloatTensor) y_vec_ = y_vec_.unsqueeze(1) # print("y_vec_:",y_vec_.size()) # print("y_:",y_) # print("z_:", type(z_)) # print("y_vec_:", type(y_vec_)) # print("y_vec_:", y_vec_.size()) if self.gpu_mode: x_, z_, y_vec_ = x_.cuda(), z_.cuda(), y_vec_.cuda() # update D network self.D_optimizer.zero_grad() D_real, C_real = self.D(x_) D_real_loss = self.BCE_loss(D_real, self.y_real_) C_real_loss = self.CE_loss(C_real, torch.max(y_vec_, 1)[1]) G_ = self.G(z_, y_vec_) D_fake, C_fake = self.D(G_) D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1]) D_loss = D_real_loss + C_real_loss + D_fake_loss + C_fake_loss self.train_hist['D_loss'].append(D_loss.item()) D_loss.backward() self.D_optimizer.step() # update G network self.G_optimizer.zero_grad() G_ = self.G(z_, y_vec_) D_fake, C_fake = self.D(G_) G_loss = self.BCE_loss(D_fake, self.y_real_) C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1]) G_loss += C_fake_loss self.train_hist['G_loss'].append(G_loss.item()) G_loss.backward() self.G_optimizer.step() if ((iter + 1) % 100) == 0: print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f" % ( (epoch + 1), (iter + 1), self.data_loader.dataset.__len__() // self.batch_size, D_loss.item(), G_loss.item())) self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) with torch.no_grad(): self.visualize_results((epoch + 1)) self.train_hist['total_time'].append(time.time() - start_time) print("Avg one epoch time: %.2f, total %d epochs time: %.2f" % (np.mean(self.train_hist['per_epoch_time']), self.epoch, self.train_hist['total_time'][0])) print("Training finish!... save training results") self.save() utils.generate_animation(self.result_dir + '/' + self.dataset + '/' + self.model_name + '/' + self.model_name, self.epoch) utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.dataset, self.model_name), self.model_name)
def forward(self, x): x = self.filter(x) out = torch.split(x, self.out_channels, 1) return torch.max(out[0], out[1])
inputs = sample_batch[0].to(device) labels = sample_batch[1].to(device) # Set up the preperation of network and optimizer net.train() optimizer.zero_grad() outputs = net(inputs) loss = loss_fc(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() tr_total += labels.size(0) tr_correct += (torch.max(outputs, 1)[1] == labels).sum().item() if (i + 1) % 200 == 0: # test for sample_batch in val_dataloader: inputs = sample_batch[0].to(device) labels = sample_batch[1].to(device) net.eval() prediction = net.predict(inputs) ts_correct += (prediction == labels).sum().item() ts_total += labels.size(0) tr_loss = running_loss / 200 tr_acc = tr_correct / tr_total ts_acc = ts_correct / ts_total
def _sample_rois_pytorch(self, all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) # TODO added new column to gt_boxes, so check that dims are okay overlaps = bbox_overlaps_batch(all_rois, gt_boxes[:, :, :5]) max_overlaps, gt_assignment = torch.max(overlaps, 2) batch_size = overlaps.size(0) num_proposal = overlaps.size(1) num_boxes_per_img = overlaps.size(2) offset = torch.arange(0, batch_size) * gt_boxes.size(1) offset = offset.view(-1, 1).type_as(gt_assignment) + gt_assignment labels = gt_boxes[:,:,4].contiguous().view(-1).index(offset.view(-1))\ .view(batch_size, -1) labels_batch = labels.new(batch_size, rois_per_image).zero_() rois_batch = all_rois.new(batch_size, rois_per_image, 5).zero_() gt_rois_batch = all_rois.new(batch_size, rois_per_image, 5).zero_() # Guard against the case when an image has fewer than max_fg_rois_per_image # foreground RoIs for i in range(batch_size): fg_inds = torch.nonzero( max_overlaps[i] >= cfg.TRAIN.FG_THRESH).view(-1) fg_num_rois = fg_inds.numel() # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = torch.nonzero( (max_overlaps[i] < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps[i] >= cfg.TRAIN.BG_THRESH_LO)).view(-1) bg_num_rois = bg_inds.numel() if fg_num_rois > 0 and bg_num_rois > 0: # sampling fg fg_rois_per_this_image = min(fg_rois_per_image, fg_num_rois) # torch.randperm seems has a bug on multi-gpu setting that cause the segfault. # See https://github.com/pytorch/pytorch/issues/1868 for more details. # use numpy instead. #rand_num = torch.randperm(fg_num_rois).long().cuda() rand_num = torch.from_numpy(np.random.permutation( fg_num_rois)).type_as(gt_boxes).long() fg_inds = fg_inds[rand_num[:fg_rois_per_this_image]] # sampling bg bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image # Seems torch.rand has a bug, it will generate very large number and make an error. # We use numpy rand instead. #rand_num = (torch.rand(bg_rois_per_this_image) * bg_num_rois).long().cuda() rand_num = np.floor( np.random.rand(bg_rois_per_this_image) * bg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long() bg_inds = bg_inds[rand_num] elif fg_num_rois > 0 and bg_num_rois == 0: # sampling fg #rand_num = torch.floor(torch.rand(rois_per_image) * fg_num_rois).long().cuda() rand_num = np.floor( np.random.rand(rois_per_image) * fg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long() fg_inds = fg_inds[rand_num] fg_rois_per_this_image = rois_per_image bg_rois_per_this_image = 0 elif bg_num_rois > 0 and fg_num_rois == 0: # sampling bg #rand_num = torch.floor(torch.rand(rois_per_image) * bg_num_rois).long().cuda() rand_num = np.floor( np.random.rand(rois_per_image) * bg_num_rois) rand_num = torch.from_numpy(rand_num).type_as(gt_boxes).long() bg_inds = bg_inds[rand_num] bg_rois_per_this_image = rois_per_image fg_rois_per_this_image = 0 else: raise ValueError( "bg_num_rois = 0 and fg_num_rois = 0, this should not happen!" ) # The indices that we're selecting (both fg and bg) keep_inds = torch.cat([fg_inds, bg_inds], 0) # Select sampled values from various arrays: labels_batch[i].copy_(labels[i][keep_inds]) # Clamp labels for the background RoIs to 0 if fg_rois_per_this_image < rois_per_image: labels_batch[i][fg_rois_per_this_image:] = 0 rois_batch[i] = all_rois[i][keep_inds] rois_batch[i, :, 0] = i gt_rois_batch[i] = gt_boxes[i][gt_assignment[i][keep_inds]] bbox_target_data = self._compute_targets_pytorch( rois_batch[:, :, 1:5], gt_rois_batch[:, :, :4]) bbox_targets, bbox_inside_weights = \ self._get_bbox_regression_labels_pytorch(bbox_target_data, labels_batch, num_classes) return labels_batch, rois_batch, bbox_targets, bbox_inside_weights
else: model.set_train_data(train_x[:, 1:], train_y[:, 1:], strict=False) gp_optimizer.zero_grad() mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) output = model(train_x[:,:1]) gp_score = mll(output, train_y[:, :1]) loss = -gp_score.sum() loss.backward() gp_optimizer.step() model.set_train_data(train_x, train_y, strict=False) p_optimizer.zero_grad() model.eval() likelihood.eval() m = model(train_x).mean best_score, best_idx = torch.max(m, dim=1) best_phi[i] = inputs_unrolled[i*n_history + best_idx] q = Q(best_phi[i]) z, qz = q() _, pz_score = pz(z) _, px_score = px(z, x) true_score = (pz_score.mean() + px_score.mean()) - qz.mean() (-true_score).backward() p_optimizer.step() if args.q=="mem": p_optimizer.zero_grad() best_score, best_idx = torch.max(train_y, dim=1) best_phi[i] = inputs_unrolled[i*n_history + best_idx] q = Q(best_phi[i])
def get_region_boxes(output, conf_thresh, num_classes, anchors, num_anchors, only_objectness=1, validation=False): anchor_step = len(anchors) // num_anchors if output.dim() == 3: output = output.unsqueeze(0) batch = output.size(0) assert (output.size(1) == (5 + num_classes) * num_anchors) h = output.size(2) w = output.size(3) t0 = time.time() all_boxes = [] output = output.view(batch * num_anchors, 5 + num_classes, h * w).transpose(0, 1).contiguous().view( 5 + num_classes, batch * num_anchors * h * w) grid_x = torch.linspace(0, w - 1, w).repeat(h, 1).repeat( batch * num_anchors, 1, 1).view(batch * num_anchors * h * w).type_as(output) # cuda() grid_y = torch.linspace(0, h - 1, h).repeat(w, 1).t().repeat( batch * num_anchors, 1, 1).view(batch * num_anchors * h * w).type_as(output) # cuda() xs = torch.sigmoid(output[0]) + grid_x ys = torch.sigmoid(output[1]) + grid_y anchor_w = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select( 1, torch.LongTensor([0])) anchor_h = torch.Tensor(anchors).view(num_anchors, anchor_step).index_select( 1, torch.LongTensor([1])) anchor_w = anchor_w.repeat(batch, 1).repeat(1, 1, h * w).view( batch * num_anchors * h * w).type_as(output) # cuda() anchor_h = anchor_h.repeat(batch, 1).repeat(1, 1, h * w).view( batch * num_anchors * h * w).type_as(output) # cuda() ws = torch.exp(output[2]) * anchor_w hs = torch.exp(output[3]) * anchor_h det_confs = torch.sigmoid(output[4]) cls_confs = torch.nn.Softmax()(Variable( output[5:5 + num_classes].transpose(0, 1))).data cls_max_confs, cls_max_ids = torch.max(cls_confs, 1) cls_max_confs = cls_max_confs.view(-1) cls_max_ids = cls_max_ids.view(-1) t1 = time.time() sz_hw = h * w sz_hwa = sz_hw * num_anchors det_confs = convert2cpu(det_confs) cls_max_confs = convert2cpu(cls_max_confs) cls_max_ids = convert2cpu_long(cls_max_ids) xs = convert2cpu(xs) ys = convert2cpu(ys) ws = convert2cpu(ws) hs = convert2cpu(hs) if validation: cls_confs = convert2cpu(cls_confs.view(-1, num_classes)) t2 = time.time() for b in range(batch): boxes = [] for cy in range(h): for cx in range(w): for i in range(num_anchors): ind = b * sz_hwa + i * sz_hw + cy * w + cx det_conf = det_confs[ind] if only_objectness: conf = det_confs[ind] else: conf = det_confs[ind] * cls_max_confs[ind] if conf > conf_thresh: bcx = xs[ind] bcy = ys[ind] bw = ws[ind] bh = hs[ind] cls_max_conf = cls_max_confs[ind] cls_max_id = cls_max_ids[ind] box = [ bcx / w, bcy / h, bw / w, bh / h, det_conf, cls_max_conf, cls_max_id ] if (not only_objectness) and validation: for c in range(num_classes): tmp_conf = cls_confs[ind][c] if c != cls_max_id and det_confs[ ind] * tmp_conf > conf_thresh: box.append(tmp_conf) box.append(c) boxes.append(box) all_boxes.append(boxes) t3 = time.time() if False: print('---------------------------------') print('matrix computation : %f' % (t1 - t0)) print(' gpu to cpu : %f' % (t2 - t1)) print(' boxes filter : %f' % (t3 - t2)) print('---------------------------------') return all_boxes
def _minimize_LBFGS(objective_function, x_initial, lr = 1, max_iter = 500, tol = 1e-5, line_search = 'Wolfe', interpolate = True, max_ls = 25, history_size = 100, out = True): model = ModelfromFunction(objective_function, x_initial) # Define optimizer optimizer = FullBatchLBFGS(model.parameters(), lr = lr, history_size = history_size, line_search = line_search, debug = False) # Main training loop if out == True: print('===================================================================================') print('Solving the Minimization Problem') print('===================================================================================') print(' Iter: | F | ||g|| | |x - y|/|x| | F Evals | alpha ') print('-----------------------------------------------------------------------------------') func_evals = 0 optimizer.zero_grad() obj = model() obj.backward() grad = model.grad() func_evals = func_evals + 1 x_old = model.x().clone() x_new = x_old.clone() f_old = obj # Main loop for n_iter in range(0, max_iter): # Define closure for line search def closure(): optimizer.zero_grad() loss_fn = model() return loss_fn # Perform line search step options = {'closure': closure, 'current_loss': obj, 'eta': 2, 'max_ls': max_ls, 'interpolate': interpolate, 'inplace': False} if line_search == 'Armijo': obj, lr, backtracks, clos_evals, desc_dir, fail = optimizer.step(options = options) # Compute gradient at new iterate obj.backward() grad = optimizer._gather_flat_grad() elif (line_search == 'Wolfe'): obj, grad, lr, backtracks, clos_evals, grad_evals, desc_dir, fail = optimizer.step(options = options) x_new.copy_(model.x()) func_evals = func_evals + clos_evals # Compute quantities for checking convergence grad_norm = torch.norm(grad) x_dist = torch.norm(x_new - x_old) / torch.norm(x_old) f_dist = torch.abs(obj - f_old) / torch.max(torch.tensor(1, dtype=torch.float), torch.abs(f_old)) # Print data if out == True: print(' %.3e | %.3e | %.3e | %.3e | %.3e | %.3e ' %(n_iter + 1, obj.item(), grad_norm.item(), x_dist.item(), clos_evals, lr)) # Stopping criterion if fail == True or torch.isnan(obj) or n_iter == max_iter - 1: break elif torch.norm(grad) < tol or x_dist < 1e-5 or f_dist < 1e-9 or obj.item() == -float('inf'): break x_old.copy_(x_new) f_old.copy_(obj) # print summary print('==================================== Summary ======================================') print('Iterations:', n_iter + 1) print('Function Evaluations:', func_evals) print('F:', obj.item()) print('||g||:', torch.norm(grad).item()) print('===================================================================================') return x_new.clone().detach().numpy()
for epoch in range(last_epoch + 1, N_epochs): test_flag=0 CNN_net.train() DNN1_net.train() DNN2_net.train() loss_sum=0 err_sum=0 for i in tqdm(range(N_batches)): [inp,lab]=create_batches_rnd(batch_size,data_folder,wav_lst_tr,snt_tr,wlen,lab_dict,0.2) pout=DNN2_net(DNN1_net(CNN_net(inp))) pred=torch.max(pout,dim=1)[1] loss = cost(pout, lab.long()) err = torch.mean((pred!=lab.long()).float()) optimizer_CNN.zero_grad() optimizer_DNN1.zero_grad() optimizer_DNN2.zero_grad() loss.backward() optimizer_CNN.step() optimizer_DNN1.step() optimizer_DNN2.step() loss_sum=loss_sum+loss.detach()
def forward(self, input): assert input.dim() == 2 self.input = input return max(input, FloatTensor([0]))
def test(epoch): global test_acc global best_test_acc global best_test_acc_epoch net.eval() total = 0 correct = 0 confuse = torch.zeros(2, 2) # 定义混淆矩阵 for i in range(0, testData_sum, cfg.bs): if testData_sum - i >= cfg.bs: inputs = X_test[i:(i+cfg.bs), :, :, :] target = y_test[i:(i+cfg.bs), :] mask = mask_test[i:(i + cfg.bs), :, :] else: inputs = X_test[i:testData_sum, :, :, :] target = y_test[i:testData_sum, :] mask = mask_test[i:testData_sum, :, :] inputs = torch.Tensor(inputs) target = torch.Tensor(target) mask = torch.Tensor(mask) if use_cuda and cfg.gpu: inputs = inputs.cuda() target = target.cuda() with torch.no_grad(): outputs = net(inputs, mask) _, predicted = torch.max(outputs[8].data, 1) _, trueValue = torch.max(target.data, 1) for j in range(predicted.size()[0]): confuse[predicted[j], trueValue[j]] += 1 total += target.size(0) correct += predicted.eq(trueValue.data).sum() for categorical in range(2): confuse[categorical] = confuse[categorical] / confuse[categorical].sum() print(confuse.data) test_acc = 100.0 * int(correct.data) / total print('在 %d 个样本中, %d 个被准确预测' % (total, correct)) print('测试准确率为 %.4f%%' % test_acc) print("一轮测试已经完成") if test_acc > best_test_acc: print('保存新的checkpoint') print("best_test_acc: %0.4f%%" % test_acc) print('best_test_epoch: %d ' % epoch) state = { 'net': net.state_dict(), 'best_test_acc': test_acc, 'best_test_acc_epoch': epoch, } torch.save(state, os.path.join('./checkpoints/res_6_3_0', cfg.model + '-' + str(epoch) + '.t7')) best_test_acc = test_acc best_test_acc_epoch = epoch print('更新完成') return
def _forward_loss( self, state: Dict[str, torch.Tensor], target_tokens: Dict[str, torch.LongTensor] ) -> Dict[str, torch.Tensor]: """ Make forward pass during training or do greedy search during prediction. Notes ----- We really only use the predictions from the method to test that beam search with a beam size of 1 gives the same results. """ # shape: (batch_size, max_input_sequence_length, encoder_output_dim) encoder_outputs = state["encoder_outputs"] # shape: (batch_size, max_input_sequence_length) source_mask = state["source_mask"] # shape: (batch_size, max_target_sequence_length) targets = target_tokens["tokens"] # Prepare embeddings for targets. They will be used as gold embeddings during decoder training # shape: (batch_size, max_target_sequence_length, embedding_dim) target_embedding = self.target_embedder(targets) # shape: (batch_size, max_target_batch_sequence_length) target_mask = util.get_text_field_mask(target_tokens) if self._scheduled_sampling_ratio == 0 and self._decoder_net.decodes_parallel: _, decoder_output = self._decoder_net( previous_state=state, previous_steps_predictions=target_embedding[:, :-1, :], encoder_outputs=encoder_outputs, source_mask=source_mask, previous_steps_mask=target_mask[:, :-1], ) # shape: (group_size, max_target_sequence_length, num_classes) logits = self._output_projection_layer(decoder_output) else: batch_size = source_mask.size()[0] _, target_sequence_length = targets.size() # The last input from the target is either padding or the end symbol. # Either way, we don't have to process it. num_decoding_steps = target_sequence_length - 1 # Initialize target predictions with the start index. # shape: (batch_size,) last_predictions = source_mask.new_full( (batch_size,), fill_value=self._start_index ) # shape: (steps, batch_size, target_embedding_dim) steps_embeddings = torch.Tensor([]) step_logits: List[torch.Tensor] = [] for timestep in range(num_decoding_steps): if ( self.training and torch.rand(1).item() < self._scheduled_sampling_ratio ): # Use gold tokens at test time and at a rate of 1 - _scheduled_sampling_ratio # during training. # shape: (batch_size, steps, target_embedding_dim) state["previous_steps_predictions"] = steps_embeddings # shape: (batch_size, ) effective_last_prediction = last_predictions else: # shape: (batch_size, ) effective_last_prediction = targets[:, timestep] if timestep == 0: state["previous_steps_predictions"] = torch.Tensor([]) else: # shape: (batch_size, steps, target_embedding_dim) state["previous_steps_predictions"] = target_embedding[ :, :timestep ] # shape: (batch_size, num_classes) output_projections, state = self._prepare_output_projections( effective_last_prediction, state ) # list of tensors, shape: (batch_size, 1, num_classes) step_logits.append(output_projections.unsqueeze(1)) # shape (predicted_classes): (batch_size,) _, predicted_classes = torch.max(output_projections, 1) # shape (predicted_classes): (batch_size,) last_predictions = predicted_classes # shape: (batch_size, 1, target_embedding_dim) last_predictions_embeddings = self.target_embedder( last_predictions ).unsqueeze(1) # This step is required, since we want to keep up two different prediction history: gold and real if steps_embeddings.shape[-1] == 0: # There is no previous steps, except for start vectors in ``last_predictions`` # shape: (group_size, 1, target_embedding_dim) steps_embeddings = last_predictions_embeddings else: # shape: (group_size, steps_count, target_embedding_dim) steps_embeddings = torch.cat( [steps_embeddings, last_predictions_embeddings], 1 ) # shape: (batch_size, num_decoding_steps, num_classes) logits = torch.cat(step_logits, 1) # Compute loss. target_mask = util.get_text_field_mask(target_tokens) loss = self._get_loss(logits, targets, target_mask) # TODO: We will be using beam search to get predictions for validation, but if beam size in 1 # we could consider taking the last_predictions here and building step_predictions # and use that instead of running beam search again, if performance in validation is taking a hit output_dict = {"loss": loss} return output_dict
def train(train_iter, dev_iter, test_iter, model, args): if args.cuda: model.cuda() # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-8) # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.init_weight_decay) # optimizer = torch.optim.SGD(model.parameters(), lr=args.lr,momentum=) # optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) if args.Adam is True: print("Adam Training......") optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.init_weight_decay) elif args.SGD is True: print("SGD Training.......") optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.init_weight_decay, momentum=args.momentum_value) elif args.Adadelta is True: print("Adadelta Training.......") optimizer = torch.optim.Adadelta(model.parameters(), lr=args.lr, weight_decay=args.init_weight_decay) # lambda1 = lambda epoch: epoch // 30 # lambda2 = lambda epoch: 0.99 ** epoch # print("lambda1 {} lambda2 {} ".format(lambda1, lambda2)) # scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda2]) # scheduler = lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.9) scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min') steps = 0 epoch_step = 0 model_count = 0 model.train() for epoch in range(1, args.epochs + 1): print("\n## 第{} 轮迭代,共计迭代 {} 次 !##\n".format(epoch, args.epochs)) # scheduler.step() # print("now lr is {} \n".format(scheduler.get_lr())) print("now lr is {} \n".format(optimizer.param_groups[0].get("lr"))) for batch in train_iter: feature, target = batch.text, batch.label feature.data.t_(), target.data.sub_(1) # batch first, index align if args.cuda: feature, target = feature.cuda(), target.cuda() optimizer.zero_grad() logit = model(feature) loss = F.cross_entropy(logit, target) loss.backward() if args.init_clip_max_norm is not None: # print("aaaa {} ".format(args.init_clip_max_norm)) utils.clip_grad_norm(model.parameters(), max_norm=args.init_clip_max_norm) optimizer.step() steps += 1 if steps % args.log_interval == 0: train_size = len(train_iter.dataset) corrects = (torch.max(logit, 1)[1].view( target.size()).data == target.data).sum() accuracy = float(corrects) / batch.batch_size * 100.0 sys.stdout.write( '\rBatch[{}/{}] - loss: {:.6f} acc: {:.4f}%({}/{})'. format(steps, train_size, loss.data[0], accuracy, corrects, batch.batch_size)) if steps % args.test_interval == 0: eval(dev_iter, model, args, scheduler) if steps % args.save_interval == 0: if not os.path.isdir(args.save_dir): os.makedirs(args.save_dir) save_prefix = os.path.join(args.save_dir, 'snapshot') save_path = '{}_steps{}.pt'.format(save_prefix, steps) torch.save(model, save_path) print("\n", save_path, end=" ") test_model = torch.load(save_path) model_count += 1 test_eval(test_iter, test_model, save_path, args, model_count) # test_eval(test_iter, model, save_path, args, model_count) # print("model_count \n", model_count) # epoch_step += 1 # if 1 <= epoch <= args.epochs + 1: # print("\n\n第 {} 轮迭代测试结果:".format(epoch)) # # eval(test_iter, model, args, scheduler) # if not os.path.isdir(args.save_dir): # os.makedirs(args.save_dir) # epoch_save_prefix = os.path.join(args.save_dir, 'snapshot') # epoch_save_path = '{}_steps{}.pt'.format(epoch_save_prefix, epoch_step) # torch.save(model, epoch_save_path) # test_epoch_model = torch.load(epoch_save_path) # test_eval(test_iter, test_epoch_model, epoch_save_path, args, 0000) return model_count
# Forward pass outputs = model(images) loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (i+1) % 100 == 0: print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' .format(epoch+1, num_epochs, i+1, total_step, loss.item())) # Test the model # In test phase, we don't need to compute gradients (for memory efficiency) with torch.no_grad(): correct = 0 total = 0 for images, labels in test_loader: images = images.reshape(-1, 28*28).to(device) labels = labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total)) # Save the model checkpoint print(model.state_dict()) torch.save(model.state_dict(), 'model.ckpt')
def train(model, criterion, optimizer, train_loader, valid_loader, save_file_name, max_epochs_stop=3, n_epochs=20, print_every=2): """Train a PyTorch Model Params -------- model (PyTorch model): cnn to train criterion (PyTorch loss): objective to minimize optimizer (PyTorch optimizier): optimizer to compute gradients of model parameters train_loader (PyTorch dataloader): training dataloader to iterate through valid_loader (PyTorch dataloader): validation dataloader used for early stopping save_file_name (str ending in '.pt'): file path to save the model state dict max_epochs_stop (int): maximum number of epochs with no improvement in validation loss for early stopping n_epochs (int): maximum number of training epochs print_every (int): frequency of epochs to print training stats Returns -------- model (PyTorch model): trained cnn with best weights history (DataFrame): history of train and validation loss and accuracy """ # Early stopping intialization epochs_no_improve = 0 valid_loss_min = np.Inf valid_max_acc = 0 history = [] # Number of epochs already trained (if using loaded in model weights) try: print(f'Model has been trained for: {model.epochs} epochs.\n') except: model.epochs = 0 print(f'Starting Training from Scratch.\n') overall_start = timer() # Main loop for epoch in range(n_epochs): # keep track of training and validation loss each epoch train_loss = 0.0 valid_loss = 0.0 train_acc = 0 valid_acc = 0 # Set to training model.train() start = timer() # Training loop for ii, (data, target) in enumerate(train_loader): # Tensors to gpu if train_on_gpu: data, target = data.cuda(), target.cuda() # Clear gradients optimizer.zero_grad() # Predicted outputs are log probabilities output = model(data) # Loss and backpropagation of gradients loss = criterion(output, target) loss.backward() # Update the parameters optimizer.step() # Track train loss by multiplying average loss by number of examples in batch train_loss += loss.item() * data.size(0) # Calculate accuracy by finding max log probability _, pred = torch.max(output, dim=1) correct_tensor = pred.eq(target.data.view_as(pred)) # Need to convert correct tensor from int to float to average accuracy = torch.mean(correct_tensor.type(torch.FloatTensor)) # Multiply average accuracy times the number of examples in batch train_acc += accuracy.item() * data.size(0) # Track training progress print( f'Epoch: {epoch}\t{100 * (ii + 1) / len(train_loader):.2f}% complete. {timer() - start:.2f} seconds elapsed in epoch.', end='\r') # After training loops ends, start validation else: model.epochs += 1 # Don't need to keep track of gradients with torch.no_grad(): # Set to evaluation mode model.eval() # Validation loop for data, target in valid_loader: # Tensors to gpu if train_on_gpu: data, target = data.cuda(), target.cuda() # Forward pass output = model(data) # Validation loss loss = criterion(output, target) # Multiply average loss times the number of examples in batch valid_loss += loss.item() * data.size(0) # Calculate validation accuracy _, pred = torch.max(output, dim=1) correct_tensor = pred.eq(target.data.view_as(pred)) accuracy = torch.mean( correct_tensor.type(torch.FloatTensor)) # Multiply average accuracy times the number of examples valid_acc += accuracy.item() * data.size(0) # Calculate average losses train_loss = train_loss / len(train_loader.dataset) valid_loss = valid_loss / len(valid_loader.dataset) # Calculate average accuracy train_acc = train_acc / len(train_loader.dataset) valid_acc = valid_acc / len(valid_loader.dataset) history.append([train_loss, valid_loss, train_acc, valid_acc]) # Print training and validation results if (epoch + 1) % print_every == 0: print( f'\nEpoch: {epoch} \tTraining Loss: {train_loss:.4f} \tValidation Loss: {valid_loss:.4f}' ) print( f'\t\tTraining Accuracy: {100 * train_acc:.2f}%\t Validation Accuracy: {100 * valid_acc:.2f}%' ) # Save the model if validation loss decreases if valid_loss < valid_loss_min: # Save model torch.save(model.state_dict(), save_file_name) # Track improvement epochs_no_improve = 0 valid_loss_min = valid_loss valid_best_acc = valid_acc best_epoch = epoch # Otherwise increment count of epochs with no improvement else: epochs_no_improve += 1 # Trigger early stopping if epochs_no_improve >= max_epochs_stop: print( f'\nEarly Stopping! Total epochs: {epoch}. Best epoch: {best_epoch} with loss: {valid_loss_min:.2f} and acc: {100 * valid_acc:.2f}%' ) total_time = timer() - overall_start print( f'{total_time:.2f} total seconds elapsed. {total_time / (epoch+1):.2f} seconds per epoch.' ) # Load the best state dict model.load_state_dict(torch.load(save_file_name)) # Attach the optimizer model.optimizer = optimizer return model # Attach the optimizer model.optimizer = optimizer # Record overall time and print out stats total_time = timer() - overall_start print( f'\nBest epoch: {best_epoch} with loss: {valid_loss_min:.2f} and acc: {100 * valid_acc:.2f}%' ) print( f'{total_time:.2f} total seconds elapsed. {total_time / (epoch):.2f} seconds per epoch.' ) return model
def train_s2m2(base_loader, base_loader_test, model, params, tmp): def mixup_criterion(criterion, pred, y_a, y_b, lam): return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b) criterion = nn.CrossEntropyLoss() rotate_classifier = nn.Sequential(nn.Linear(640, 4)) rotate_classifier.to(device) model.to(device) if 'rotate' in tmp: print("loading rotate model") rotate_classifier.load_state_dict(tmp['rotate']) optimizer = torch.optim.Adam([{ 'params': model.parameters() }, { 'params': rotate_classifier.parameters() }]) start_epoch, stop_epoch = params.start_epoch, params.start_epoch + params.stop_epoch print("stop_epoch", start_epoch, stop_epoch) for epoch in range(start_epoch, stop_epoch): print('\nEpoch: %d' % epoch) model.train() train_loss, rotate_loss = 0, 0 correct, total = 0, 0 torch.cuda.empty_cache() for batch_idx, (inputs, targets) in enumerate(base_loader): inputs, targets = inputs.to(device), targets.to(device) lam = np.random.beta(params.alpha, params.alpha) f, outputs, target_a, target_b = model(inputs, targets, mixup_hidden=True, mixup_alpha=params.alpha, lam=lam) loss = mixup_criterion(criterion, outputs, target_a, target_b, lam) train_loss += loss.data.item() optimizer.zero_grad() loss.backward() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += ( lam * predicted.eq(target_a.data).cpu().sum().float() + (1 - lam) * predicted.eq(target_b.data).cpu().sum().float()) bs = inputs.size(0) inputs_, targets_, a_ = [], [], [] indices = np.arange(bs) np.random.shuffle(indices) split_size = int(bs / 4) for j in indices[0:split_size]: x90 = inputs[j].transpose(2, 1).flip(1) x180 = x90.transpose(2, 1).flip(1) x270 = x180.transpose(2, 1).flip(1) inputs_ += [inputs[j], x90, x180, x270] targets_ += [targets[j] for _ in range(4)] a_ += [ torch.tensor(0), torch.tensor(1), torch.tensor(2), torch.tensor(3) ] inputs = Variable(torch.stack(inputs_, 0)) targets = Variable(torch.stack(targets_, 0)) a_ = Variable(torch.stack(a_, 0)) inputs, targets, a_ = inputs.to(device), targets.to(device), a_.to( device) rf, outputs = model(inputs) rotate_outputs = rotate_classifier(rf) rloss = criterion(rotate_outputs, a_) closs = criterion(outputs, targets) loss = (rloss + closs) / 2.0 rotate_loss += rloss.data.item() loss.backward() optimizer.step() if (batch_idx + 1) % 50 == 0: print( '{0}/{1}'.format(batch_idx, len(base_loader)), 'Loss: %.3f | Acc: %.3f%% | RotLoss: %.3f ' % (train_loss / (batch_idx + 1), 100. * correct / total, rotate_loss / (batch_idx + 1))) if (epoch % params.save_freq == 0) or (epoch == stop_epoch - 1): if not os.path.isdir(params.checkpoint_dir): os.makedirs(params.checkpoint_dir) outfile = os.path.join(params.checkpoint_dir, '{:d}.tar'.format(epoch)) torch.save({'epoch': epoch, 'state': model.state_dict()}, outfile) test_s2m2(base_loader_test, model, criterion) return model
def get_prediction(image_tensor): outputs = model(image_tensor) probability, prediction = torch.max(outputs.data, 1) return probability, prediction
def testing(model, loss_, data_iter, config): model.eval() correct = 0 total = 0 labels_all = [] output_all = [] attention_indices_all = [] words_atten_all = [] for i in range(len(data_iter)): vectors, labels, _ = get_batch(data_iter[i]) if config['model'] == 'bigru_max': pad_by_batch = True max_length = 100000 random_cutting = False elif config['model'] == 'cnn_rnn': pad_by_batch = False max_length = None random_cutting = None vectors, _ = pad_minibatch(vectors, config['word_padded_length_in_notes'], config['padding_before_batch'], pad_by_batch, max_length, random_cutting) labels = torch.stack(labels).squeeze() if config['cuda']: vectors = vectors.cuda() labels = labels.cuda() vectors = Variable(vectors) labels = Variable(labels) if config['model'] == 'bigru_max': output, attention_indices, words_atten, _, _ = model_setup_bigru_max( model, vectors, config, test_model=True) attention_indices_all.append(attention_indices) words_atten_all.append(words_atten) elif config['model'] == 'cnn_rnn': note_attn_norm, output = model_setup_cnn_rnn( model, vectors, config) note_attn_norm = note_attn_norm.cpu() attention_indices_all.append(note_attn_norm.numpy()) print('note_attn_norm:') print(note_attn_norm) output = F.softmax(output) #loss = loss_(output, labels) _, predicted = torch.max(output.data, 1) total += labels.size(0) predicted = predicted.cpu() predicted = predicted.numpy() labels = labels.cpu() labels = labels.data.numpy() correct += (predicted == labels).sum() labels_all += list(labels) output = output.cpu() output_all += list(output.data.numpy()) output_all = np.array(output_all) auc = metrics.roc_auc_score(labels_all, output_all[:, 1]) loss_epoch = metrics.log_loss(labels_all, output_all[:, 1]) return loss_epoch, correct / float( total), auc, attention_indices_all, words_atten_all
def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4): conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2) prediction = prediction * conf_mask try: ind_nz = torch.nonzero(prediction[:, :, 4]).transpose(0, 1).contiguous() except: return 0 box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2] / 2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3] / 2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2] / 2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3] / 2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): # select the image from the batch image_pred = prediction[ind] # Get the class having maximum score, and the index of that class # Get rid of num_classes softmax scores # Add the class index and the class score of class having maximum score max_conf, max_conf_score = torch.max(image_pred[:, 5:5 + num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_score = max_conf_score.float().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) # Get rid of the zero entries non_zero_ind = (torch.nonzero(image_pred[:, 4])) image_pred_ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) # Get the various classes detected in the image try: img_classes = unique(image_pred_[:, -1]) except: continue # WE will do NMS classwise for cls in img_classes: # get the detections with one particular class cls_mask = image_pred_ * (image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1, 7) # sort the detections such that the entry with the maximum objectness # confidence is at the top conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) # if nms has to be done if nms: # For each detection for i in range(idx): # Get the IOUs of all boxes that come after the one we are looking at # in the loop try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i + 1:]) except ValueError: break except IndexError: break # Zero out all the detections that have IoU > treshhold iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i + 1:] *= iou_mask # Remove the non-zero entries non_zero_ind = torch.nonzero( image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view( -1, 7) # Concatenate the batch_id of the image to the detection # this helps us identify which image does the detection correspond to # We use a linear straucture to hold ALL the detections from the batch # the batch_dim is flattened # batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output
def train(epoch): model_stage1.train(); model_select.train(); ''' part1_time=0; part2_time=0; part3_time=0; prev_time=time.time(); ''' k=0; for batch_idx,sample in enumerate(train_data.get_loader()): ''' now_time=time.time(); part3_time+=now_time-prev_time; prev_time=now_time; ''' if (use_gpu): sample['image']=sample['image'].to(device) sample['label']=sample['label'].to(device) sample['size'][0]=sample['size'][0].to(device); sample['size'][1]=sample['size'][1].to(device); optimizer_stage1.zero_grad(); optimizer_select.zero_grad(); stage1_label=model_stage1(sample['image']) theta=model_select(stage1_label,sample['size']) theta_label = torch.zeros((sample['image'].size()[0],6,2,3),device=device,requires_grad=False); #[batch_size,6,2,3] W=1024.0; H=1024.0; ''' cens = torch.floor(calc_centroid_old(sample['label'])) #[batch_size,9,2] for i in range(sample['image'].size()[0]): for j in range(9): cens[i,j,0]=cens[i,j,0]*(sample['size'][0][i]-1.0)/(128.0-1.0) cens[i,j,1]=cens[i,j,1]*(sample['size'][1][i]-1.0)/(128.0-1.0) points = torch.floor(torch.cat([cens[:, 1:6],cens[:, 6:9].mean(dim=1, keepdim=True)],dim=1)) #[batch_size,6,2] ''' ''' points2 = torch.floor(calc_centroid(sample['label_org'])) #[batch_size,9,2] print("cens resize:"); print(points); print("cens org:"); print(points2); print("delta"); print(points.cpu()-points2); input("wait"); ''' points=torch.floor(calc_centroid(sample['label_org'])) for i in range(6): theta_label[:,i,0,0]=(81.0-1.0)/(W-1.0); theta_label[:,i,1,1]=(81.0-1.0)/(H-1.0); theta_label[:,i,0,2]=-1+2*points[:,i,0]/(W-1.0); theta_label[:,i,1,2]=-1+2*points[:,i,1]/(H-1.0); if (torch.min(theta_label)<-1 or torch.max(theta_label)>1): print("F**K"); print(k); ''' for i in range(sample['image'].shape[0]): if (not os.path.exists("./data/select_pre/"+train_data.get_namelist()[(k+i)%2000])): os.mkdir("./data/select_pre/"+train_data.get_namelist()[(k+i)%2000]); image=sample['image_org'][i].cpu().clone(); image=transforms.ToPILImage()(image).convert('RGB') plt.imshow(image); plt.show(block=True); image.save('./data/select_pre/'+train_data.get_namelist()[(k+i)%2000]+'/'+str((k+i)//2000)+'_img'+'.jpg',quality=100); for j in range(6): affine_stage2=F.affine_grid(theta_label[i][j].unsqueeze(0),(1,1,81,81),align_corners=True); image=F.grid_sample(sample['label_org'][i][label_list[j][1]].unsqueeze(0).unsqueeze(0).to(device),affine_stage2,align_corners=True); image=image.squeeze(0).cpu(); image=transforms.ToPILImage()(image); image.save('./data/select_pre/'+train_data.get_namelist()[(k+i)%2000]+'/'+str((k+i)//2000)+'_'+str(j)+'_thetalabel'+'.jpg',quality=100); image=sample['label_org'][i][label_list[j][1]] image=transforms.ToPILImage()(image); image.save('./data/select_pre/'+train_data.get_namelist()[(k+i)%2000]+'/'+str((k+i)//2000)+'_'+str(j)+'_orglabel'+'.jpg',quality=100); #plt.imshow(image); #plt.show(block=True); ''' k+=sample['image'].shape[0]; loss=fun.smooth_l1_loss(theta, theta_label); ''' now_time=time.time(); part1_time+=now_time-prev_time; prev_time=now_time; ''' loss.backward() optimizer_select.step(); optimizer_stage1.step(); ''' now_time=time.time(); part2_time+=now_time-prev_time; prev_time=now_time; ''' if (batch_idx%250==0): print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(sample['image']), len(train_data.get_loader().dataset), 100. * batch_idx / len(train_data.get_loader()),loss)) '''
def forward(self, batch_dict): """Compute loss for model. If both `labels` and `mask` are None, it degenerates to SimCLR unsupervised loss: https://arxiv.org/pdf/2002.05709.pdf Args: features: hidden vector of shape [bsz, n_views, ...]. labels: ground truth of shape [bsz]. mask: contrastive mask of shape [bsz, bsz], mask_{i,j}=1 if sample j has the same class as sample i. Can be asymmetric. Returns: A loss scalar. """ # join features features = torch.cat( [ bd["contrastive_projection_norm"].unsqueeze(dim=1) for bd in batch_dict ], dim=1, ) # targets for the batch is the one with highest score labels = batch_dict[0]["target"].argmax(dim=-1).view(-1, 1) # samples without an answer cannot work as anchor points mask_samples = (batch_dict[0]["target"].sum(dim=-1) != 0).int() # mask pos_mask = None device = torch.device("cuda") if features.is_cuda else torch.device( "cpu") if len(features.shape) < 3: raise ValueError("`features` needs to be [bsz, n_views, ...]," "at least 3 dimensions are required") if len(features.shape) > 3: features = features.view(features.shape[0], features.shape[1], -1) batch_size = features.shape[0] if labels is not None and pos_mask is not None: raise ValueError("Cannot define both `labels` and `mask`") elif labels is None and pos_mask is None: pos_mask = torch.eye(batch_size, dtype=torch.float32).to(device) elif labels is not None: labels = labels.contiguous().view(-1, 1) if labels.shape[0] != batch_size: raise ValueError( "Num of labels does not match num of features") pos_mask = torch.eq(labels, labels.T).float().to(device) else: pos_mask = pos_mask.float().to(device) # remove samples without gt pos_mask = pos_mask * mask_samples contrast_count = features.shape[1] contrast_feature = torch.cat(torch.unbind(features, dim=1), dim=0) if self.contrast_mode == "one": anchor_feature = features[:, 0] anchor_count = 1 elif self.contrast_mode == "all": anchor_feature = contrast_feature anchor_count = contrast_count else: raise ValueError("Unknown mode: {}".format(self.contrast_mode)) # compute logits anchor_dot_contrast = torch.div( torch.matmul(anchor_feature, contrast_feature.T), self.temperature) # for numerical stability, doesn't affect any values ahead logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True) logits = anchor_dot_contrast - logits_max.detach() # tile mask pos_mask = pos_mask.repeat(anchor_count, contrast_count) # mask-out self-contrast cases logits_mask = torch.scatter( torch.ones_like(pos_mask), 1, torch.arange(batch_size * anchor_count).view(-1, 1).to(device), 0, ) # This is just an inverted identity matrix # assert logits_mask.cpu() == (torch.eye(logits_mask.shape[0]) == 0).int() pos_mask = pos_mask * logits_mask # compute log_prob exp_logits = torch.exp(logits) * logits_mask if self.formulation == "custom": negs_mask = (pos_mask == 0).int() * logits_mask negs_sum = (exp_logits * negs_mask).sum(dim=-1, keepdim=True) denominator = negs_sum + exp_logits * pos_mask log_prob = logits - torch.log(denominator.sum(1, keepdim=True)) else: assert self.formulation == "normal" log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True)) # re-scaling rephrasings scl_mask_rescale_factor = registry.scl_mask_rescale_factor if scl_mask_rescale_factor > 0: secondary_mask = (torch.eye(batch_size, device=pos_mask.device).repeat( anchor_count, contrast_count).fill_diagonal_(0)) secondary_mask = secondary_mask * scl_mask_rescale_factor secondary_mask[secondary_mask == 0] = 1 pos_mask = pos_mask * secondary_mask # compute mean of log-likelihood over positive mean_log_prob_pos = (pos_mask * log_prob).sum(1) / torch.max( pos_mask.sum(1), torch.ones(1).to(pos_mask.device)) # loss loss = -(self.temperature / self.base_temperature) * mean_log_prob_pos loss = loss.view(anchor_count, batch_size).mean() return loss, -1
def forward(self, x): x = self.conv(x) x = torch.mean(x, dim=3) x, _ = torch.max(x, dim=2) x = self.fc(x) return x
def train( train_loader, val_loader, optimizer, criterion, device, epochs, model, **kwargs, ): """train [main training function of the project] [extended_summary] Args: train_loader ([torch.Dataloader]): [dataloader with the training data] optimizer ([torch.optim]): [optimizer for the network] criterion ([Loss function]): [Pytorch loss function] device ([str]): [device to train on cpu/cuda] epochs (int, optional): [epochs to run]. Defaults to 5. **kwargs (verbose and validation dataloader) Returns: [tupel(trained network, train_loss )]: """ verbose = kwargs.get("verbose", 1) if verbose > 0: print("\nTraining with device :", device) print("Number of Training Samples : ", len(train_loader.dataset)) if val_loader is not None: print("Number of Validation Samples : ", len(val_loader.dataset)) print("Number of Epochs : ", epochs) if verbose > 1: summary(model, input_size=(3, 32, 32)) lr_sheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, "min", factor=0.1, patience=int(epochs * 0.05), min_lr=1e-7, verbose=True, ) validation = False if kwargs.get("patience", None) is None: print( f"INFO ------ Early Stopping Patience not specified using {int(epochs * 0.1)}" ) patience = kwargs.get("patience", int(epochs * 0.1)) # early_stopping = EarlyStopping(patience, verbose=True, delta=1e-6) max_disc = kwargs.get("max_disc", False) for epoch in range(1, epochs + 1): if verbose > 0: print(f"\nEpoch: {epoch}") correct_1 = 0 correct_2 = 0 total = 0 train_loss = 0 train_acc = 0 for batch_idx, (data, target) in enumerate(train_loader): if len(data) > 1: model.train() data, target = data.to(device).float(), target.to( device).long() optimizer.zero_grad(set_to_none=True) if max_disc: out_1, out_2 = model(data) out_1.to(device) out_2.to(device) loss = criterion(out_1, target) + criterion(out_2, target) _, pred_1 = torch.max(out_1.data, 1) _, pred_2 = torch.max(out_2.data, 1) total += target.size(0) correct_1 += (pred_1 == target).sum().item() correct_2 += (pred_2 == target).sum().item() else: yhat = model(data).to(device) loss = criterion(yhat, target) train_acc += torch.sum( torch.argmax(yhat, dim=1) == target).item() try: train_loss += loss.item() except: print("loss item skipped loss") loss.backward() optimizer.step() else: pass avg_train_loss = train_loss / len(train_loader) if max_disc: avg_train_acc = ((correct_1 / total) + (correct_2 / total)) / 2 else: avg_train_acc = train_acc / len(train_loader.dataset) if epoch % 1 == 0: pass # if validation: # val_loss = 0 # val_acc = 0 # model.eval() # prep model for evaluation # with torch.no_grad(): # for vdata, vtarget in val_loader: # vdata, vtarget = ( # vdata.to(device).float(), # vtarget.to(device).long(), # ) # voutput = model(vdata) # vloss = criterion(voutput, vtarget) # val_loss += vloss.item() # val_acc += torch.sum( # torch.argmax(voutput, dim=1) == vtarget # ).item() # avg_val_loss = val_loss / len(val_loader) # avg_val_acc = val_acc / len(val_loader.dataset) # early_stopping(avg_val_loss, model) # if kwargs.get("lr_sheduler", True): # lr_sheduler.step(avg_val_loss) # verbosity( # f"Val_loss: {avg_val_loss:.4f} Val_acc : {100*avg_val_acc:.2f}", # verbose, # epoch, # ) # if early_stopping.early_stop: # print( # f"Early stopping epoch {epoch} , avg train_loss {avg_train_loss}, avg val loss {avg_val_loss}" # ) # break verbosity( f"Train_loss: {avg_train_loss:.4f} Train_acc : {100*avg_train_acc:.2f}", verbose, epoch, ) return model, avg_train_loss, avg_train_acc
def _calibrate_range(self, x1, x2, min, max, padding=0): _x1 = torch.min(x1, x2) - padding _x2 = torch.max(x1, x2) + padding _x1 = torch.clamp(_x1, min=min) _x2 = torch.clamp(_x2, max=max) return _x1, _x2
def inference(img, hmtnet_model_file='../main/model/hmt-net-fer.pth'): """ inference with pre-trained HMT-Net :param img: an image filepath or image numpy array :param hmtnet_model_file: :return: """ hmtnet = HMTNet() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") hmtnet = nn.DataParallel(hmtnet) hmtnet.load_state_dict(torch.load(hmtnet_model_file)) hmtnet.eval() if type(img) is str: image = resize(io.imread(img), (224, 224), mode='constant') else: img = cv2.resize(img, (224, 224)) image = img.astype(np.float64) image[:, :, 0] -= np.mean(image[:, :, 0]) image[:, :, 1] -= np.mean(image[:, :, 1]) image[:, :, 2] -= np.mean(image[:, :, 2]) image = np.transpose(image, [2, 0, 1]) input = torch.from_numpy(image).unsqueeze(0).float() hmtnet = hmtnet.to(device) input = input.to(device) tik = time.time() e_pred, a_pred, r_pred, g_pred = hmtnet.forward(input) tok = time.time() _, e_predicted = torch.max(e_pred.data, 1) _, a_predicted = torch.max(a_pred.data, 1) _, r_predicted = torch.max(r_pred.data, 1) _, g_predicted = torch.max(g_pred.data, 1) if int(g_predicted.to("cpu")) == 0: g_pred = 'male' elif int(g_predicted.to("cpu")) == 1: g_pred = 'female' elif int(g_predicted.to("cpu")) == 2: g_pred = 'unsure' if int(r_predicted.to("cpu")) == 0: r_pred = 'Caucasian' elif int(r_predicted.to("cpu")) == 1: r_pred = 'African-American' elif int(r_predicted.to("cpu")) == 2: r_pred = 'Asian' if int(a_predicted.to("cpu")) == 0: a_pred = '0-3' elif int(a_predicted.to("cpu")) == 1: a_pred = '4-19' elif int(a_predicted.to("cpu")) == 2: a_pred = '20-39' elif int(a_predicted.to("cpu")) == 3: a_pred = '40-69' elif int(a_predicted.to("cpu")) == 4: a_pred = '70+' if int(e_predicted.to("cpu")) == 0: e_pred = 'Surprise' elif int(e_predicted.to("cpu")) == 1: e_pred = 'Fear' elif int(e_predicted.to("cpu")) == 2: e_pred = 'Disgust' elif int(e_predicted.to("cpu")) == 3: e_pred = 'Happiness' elif int(e_predicted.to("cpu")) == 4: e_pred = 'Sadness' elif int(e_predicted.to("cpu")) == 5: e_pred = 'Anger' elif int(e_predicted.to("cpu")) == 6: e_pred = 'Neutral' # coord = c_pred.data.to("cpu").view(-1).tolist() # landmarks = [[coord[i], coord[i + 5]] for i in range(5)] return {'gender': g_pred, 'emotion': e_pred, 'race': r_pred, 'age': a_pred, 'elapse': tok - tik}
def predict(self, x): output = self.forward(x) _, prediction = torch.max(output, 1) return prediction
def get_action(self, inp): qvalue = self.forward(inp) _, action = torch.max(qvalue, 1) return action.item()
# Add to y and ^y if i == 0: total_predicted = y_predicted total_labels = labels else: total_predicted = torch.cat((total_predicted, y_predicted), dim=1) total_labels = torch.cat((total_labels, labels), dim=1) # end if # Total total += 1.0 # end for # Outputs stats print(u"Min : {}".format(torch.min(total_predicted))) print(u"Max : {}".format(torch.max(total_predicted))) print(u"Mean : {}".format(torch.mean(total_predicted))) print(u"Std : {}".format(torch.std(total_predicted))) # Save result xp.add_result(0) # end for # Delete classifier del esn # W index w_index += 1 # end for samples # Last space