def get_text(self, text): text = text_to_sequence(text, self.char2idx) text_norm = torch.IntTensor(text) return text_norm
def validation(model, criterion, evaluation_loader, converter, opt): """ validation or evaluation """ n_correct = 0 norm_ED = 0 length_of_data = 0 infer_time = 0 valid_loss_avg = Averager() for i, (image_tensors, labels) in enumerate(evaluation_loader): batch_size = image_tensors.size(0) length_of_data = length_of_data + batch_size image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt["batch_max_length"]] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt["batch_max_length"] + 1).fill_(0).to(device) text_for_loss, length_for_loss = converter.encode( labels, batch_max_length=opt["batch_max_length"]) start_time = time.time() if 'CTC' in opt["Prediction"]: preds = model(image, text_for_pred) forward_time = time.time() - start_time # Calculate evaluation loss for CTC deocder. preds_size = torch.IntTensor([preds.size(1)] * batch_size) # permute 'preds' to use CTCloss format if opt.baiduCTC: cost = criterion(preds.permute(1, 0, 2), text_for_loss, preds_size, length_for_loss) / batch_size else: cost = criterion( preds.log_softmax(2).permute(1, 0, 2), text_for_loss, preds_size, length_for_loss) # Select max probabilty (greedy decoding) then decode index to character if opt.baiduCTC: _, preds_index = preds.max(2) preds_index = preds_index.view(-1) else: _, preds_index = preds.max(2) preds_str = converter.decode(preds_index.data, preds_size.data) else: preds = model(image, text_for_pred, is_train=False) forward_time = time.time() - start_time preds = preds[:, :text_for_loss.shape[1] - 1, :] target = text_for_loss[:, 1:] # without [GO] Symbol cost = criterion(preds.contiguous().view(-1, preds.shape[-1]), target.contiguous().view(-1)) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) labels = converter.decode(text_for_loss[:, 1:], length_for_loss) infer_time += forward_time valid_loss_avg.add(cost) # calculate accuracy & confidence score preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) confidence_score_list = [] for gt, pred, pred_max_prob in zip(labels, preds_str, preds_max_prob): if 'Attn' in opt["Prediction"]: gt = gt[:gt.find('[s]')] pred_EOS = pred.find('[s]') pred = pred[: pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # To evaluate 'case sensitive model' with alphanumeric and case insensitve setting. if opt["sensitive"] and opt.data_filtering_off: pred = pred.lower() gt = gt.lower() alphanumeric_case_insensitve = '0123456789abcdefghijklmnopqrstuvwxyz' out_of_alphanumeric_case_insensitve = f'[^{alphanumeric_case_insensitve}]' pred = re.sub(out_of_alphanumeric_case_insensitve, '', pred) gt = re.sub(out_of_alphanumeric_case_insensitve, '', gt) if pred == gt: n_correct += 1 ''' (old version) ICDAR2017 DOST Normalized Edit Distance https://rrc.cvc.uab.es/?ch=7&com=tasks "For each word we calculate the normalized edit distance to the length of the ground truth transcription." if len(gt) == 0: norm_ED += 1 else: norm_ED += edit_distance(pred, gt) / len(gt) ''' # ICDAR2019 Normalized Edit Distance if len(gt) == 0 or len(pred) == 0: norm_ED += 0 elif len(gt) > len(pred): norm_ED += 1 - edit_distance(pred, gt) / len(gt) else: norm_ED += 1 - edit_distance(pred, gt) / len(pred) # calculate confidence score (= multiply of pred_max_prob) try: confidence_score = pred_max_prob.cumprod(dim=0)[-1] except: confidence_score = 0 # for empty pred case, when prune after "end of sentence" token ([s]) confidence_score_list.append(confidence_score) # print(pred, gt, pred==gt, confidence_score) accuracy = n_correct / float(length_of_data) * 100 norm_ED = norm_ED / float( length_of_data) # ICDAR2019 Normalized Edit Distance return valid_loss_avg.val( ), accuracy, norm_ED, preds_str, confidence_score_list, labels, infer_time, length_of_data
def val(net, test_dataset, criterion, max_iter=2): print('Start val') for p in cnn.parameters(): p.requires_grad = False net.eval() net.load_state_dict({k.replace('module.', ''): v for k, v in torch.load(cnn_data).items()}) val_loader = torch.utils.data.DataLoader( test_dataset, shuffle=True, batch_size=batchSize, num_workers=int(workers), collate_fn=lmdb_dataset.alignCollate(keep_ratio=True)) val_iter = iter(val_loader) n_correct = 0 loss_avg = utils.averager() image = torch.FloatTensor(batchSize, 1, imgH, imgW) max_iter = min(max_iter, len(val_loader)) for i in range(max_iter): data = val_iter.next() # i += 1 cpu_images, cpu_texts = data # 输入的图片数 batch_size = cpu_images.size(0) # print('cpu images', cpu_images, 'shape', cpu_images.size()) utils.loadData(image, cpu_images) cpu_texts = [clean_txt(tx.encode('utf-8').decode('utf-8')) for tx in cpu_texts] t, l = converter.encode(cpu_texts) # 重新匹配尺寸 utils.loadData(text, t) # 文字索引 utils.loadData(length, l) # 文字 image = cpu_images * 255 image = image.cuda() preds = cnn(image) preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) cost = criterion(preds, text, preds_size, length) / batch_size loss_avg.add(cost) # 返回最大值和索引 _, preds = preds.max(2) # 返回最大值的索引 # print('max preds', preds) # preds = preds.squeeze(1) # 将tensor内存变为连续 preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) print('preds', sim_preds, 'target', cpu_texts) for pred, target in zip(sim_preds, cpu_texts): if pred.strip() == target.strip(): n_correct += 1 accuracy = n_correct / float(max_iter * batchSize) testLoss = loss_avg.val() # print('Test loss: %f, accuray: %f' % (testLoss, accuracy)) return testLoss, accuracy
imgH = 32 # should be 32 nclass = len(alphabet) + 1 nhiddenstate = 256 model = crnn.CRNN(imgH, 1, nclass, nhiddenstate) if torch.cuda.is_available(): model = model.cuda() print('loading pretrained model from %s' % model_path) model.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((200, 32)) image = Image.open(img_path).convert('L') image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('%-30s => %-30s' % (raw_pred, sim_pred))
def __init__(self, enabled=False): super().__init__() self.register_buffer('enabled', torch.IntTensor([0])) if enabled: self.is_enabled = True
def active_search(ptr_net, point, road=None, iter_time=300, batch_size=300, lr_p=0.01, beta1=0.9, alpha=0.01, alpha_decay=0.9, plot_comp=True, plot_mean=False, print_searching_log=True, log_file_name='search_log.csv', save_net=True, save_name='search_'): ''' searching for shortest road for a particular points distribution point:(city,coor),tensor road:(city),numpy ''' ptr_op = optim.Adam(ptr_net.parameters(), lr=lr_p, betas=(beta1, 0.999)) point_copy = torch.unsqueeze(point, 0).repeat(batch_size, 1, 1) if road is None: road = ptr_net.get_road(point_copy)[0] city = road.shape[0] road = torch.IntTensor(road) road_copy = torch.unsqueeze(road, 0).repeat(batch_size, 1) road_best = road_copy[0] #point_copy:(batch,city,coor),tensor #road_copy:(batch,city),tensor #road_best:(city) length_best = get_length_sum(point_copy, road_copy)[0] length_init = get_length_sum(point_copy, road_copy)[0] #length_best:number baseline = length_best mean = [] if print_searching_log: log_file = open(log_file_name, 'a') log_file.write('iter,ptr_loss,ptr_grad,best,mean_length\n') log_file.close() #length_best:number for i in range(iter_time): point_input = city_shuffle(point_copy) road_output = ptr_net.get_road(point_input) length_all = get_length_sum(point_input, road_output) #length_all:(batch) j = torch.argmin(length_all) if length_all[j] < length_best: length_best = length_all[j] road_shuffle = road_output[j, :] point_shuffle = point_input[j] #print(get_length_sum_single(point_shuffle,road_shuffle)) road_best = adjust_road(road_shuffle, point_shuffle, point) adv = baseline - length_all ptr_loss = torch.dot(ptr_net(point_input, road_output), adv) ptr_net.zero_grad() ptr_loss.backward(retain_graph=True) ptr_grad = torch.nn.utils.clip_grad_norm_(ptr_net.parameters(), 1) ptr_op.step() if i % 10 == 0: print(i) if save_net: torch.save(ptr_net, save_name + 'ptr.pkl') mean.append(float((torch.mean(length_all)))) if print_searching_log: log_file = open(log_file_name, 'a') log_file.write( str(i) + ',' + str(float(ptr_loss)) + ',' + str(float(ptr_grad)) + ',' + str(float(length_all[j])) + ',' + str(float((torch.mean(length_all)))) + '\n') log_file.close() baseline = baseline * alpha_decay + ( 1 - alpha_decay) * torch.mean(length_all) fig = plt.figure() if plot_comp == True: point = point.numpy() fig1, ax = plt.subplots(1, 2) ax_init = ax[0] ax_init.set_title('init:' + str(round(float(length_init), 4)), fontsize=14, fontweight='bold') for i in range(city - 1): ax_init.plot(point[[road[i], road[i + 1]], 0], point[[road[i], road[i + 1]], 1], color='b') ax_init.plot(point[[road[city - 1], road[0]], 0], point[[road[city - 1], road[0]], 1], color='b') road_best_copy = torch.unsqueeze(torch.IntTensor(road_best), 0).repeat(batch_size, 1) ax_after = ax[1] ax_after.set_title( 'after:' + str(round(float(get_length_sum(point, road_best)), 4)), fontsize=14, fontweight='bold') for i in range(city - 1): ax_after.plot(point[[road_best[i], road_best[i + 1]], 0], point[[road_best[i], road_best[i + 1]], 1], color='b') ax_after.plot(point[[road_best[city - 1], road_best[0]], 0], point[[road_best[city - 1], road_best[0]], 1], color='b') elif plot_mean == True: plt.plot(mean) fig.show() return { 'ptr_net': ptr_net, 'mean': mean, 'road_best': road_best, 'length_best': length_best }
def response(self, input_message): ''' The agent moves a step forward, upon receiving a message from the user. ''' assert input_message.sender == cfg.USER assert input_message.receiver == cfg.AGENT #_______ update the agent self_______ if input_message.message_type == cfg.INFORM_FACET: self.update_upon_feature_inform(input_message) if input_message.message_type == cfg.REJECT_REC: self.rejected_item_list_ += input_message.data[ 'rejected_item_list'] self.rejected_time += 1 if self.mini == 1: if self.alwaysupdate == 1: for i in range(cfg.update_count): self.mini_update_FM() self.mini_update_already = True self.recent_candidate_list = list( set(self.recent_candidate_list) - set(self.rejected_item_list_)) self.recent_candidate_list = list( set(self.recent_candidate_list) - set([self.busi_id])) + [self.busi_id] self.recent_candidate_list_ranked, self.previous_dict = rank_items( self.known_feature, self.user_id, self.busi_id, self.skip_big_feature, self.FM_model, self.recent_candidate_list, self.write_fp, 1, self.rejected_item_list_, self.previous_dict) #_______ Adding into history _______ if input_message.message_type == cfg.INFORM_FACET: if self.turn_count > 0: # means first doesn't# count if input_message.data['value'] is None: self.history_list.append(0) # ask attribute, fail else: self.history_list.append(1) # ask attribute, successful if input_message.message_type == cfg.REJECT_REC: self.history_list.append( -1) # try recommendation, user doesn't want. self.recent_candidate_list = list( set(self.recent_candidate_list) - set(self.rejected_item_list_)) # don't consider if cfg.play_by != 'AOO' and cfg.play_by != 'AOO_valid': # Add control point here if cfg.mod == 'ear': state_vector = self.vectorize() else: state_vector = self.vectorize_crm() action = None SoftMax = nn.Softmax(dim=-1) if cfg.play_by == 'AOO' or cfg.play_by == 'AOO_valid': new_message = self.prepare_next_question() # if cfg.play_by == 'AO': # means Ask, the recommendation is made by a probability new_message = self.prepare_next_question() x = len(self.recent_candidate_list) p = 10.0 / x a = random.uniform(0, 1) if a < p: new_message = self.prepare_rec_message() if cfg.play_by == 'RO': # means RecOnly, only make recommendation at each turn. # For Abs-Greedy Evaluation new_message = self.prepare_rec_message() if cfg.play_by == 'policy': # means using Policy Network to determine action s = torch.from_numpy(state_vector).float() s = Variable(s, requires_grad=True) self.PN_model.eval() pred = self.PN_model(s) prob = SoftMax(pred) c = Categorical(prob) # different way to choose action if cfg.eval == 1: # for evaluation of Action stage pred_data = pred.data.tolist() sorted_index = sorted(range(len(pred_data)), key=lambda k: pred_data[k], reverse=True) # The following line are for avoid asking same question # It is a fair evaluation, because all models have such operation. unasked_max = None for item in sorted_index: if item < self.big_feature_length: if cfg.FACET_POOL[item] not in self.asked_feature: unasked_max = item break else: unasked_max = self.big_feature_length break action = Variable(torch.IntTensor( [unasked_max])) # make it compatible with torch print('action is: {}'.format(action)) else: # for training of Action stage i = 0 action_ = self.big_feature_length while (i < 10000): action_ = c.sample() i += 1 if action_ <= self.big_feature_length: if action_ == self.big_feature_length: break elif cfg.FACET_POOL[action_] not in self.asked_feature: break action = action_ print('action is: {}'.format(action)) log_prob = c.log_prob(action) if self.turn_count != 0: self.log_prob_list = torch.cat( [self.log_prob_list, log_prob.reshape(1)]) else: self.log_prob_list = log_prob.reshape(1) # translate into message if action < len(cfg.FACET_POOL): data = dict() data['facet'] = cfg.FACET_POOL[action] new_message = message(cfg.AGENT, cfg.USER, cfg.ASK_FACET, data) else: new_message = self.prepare_rec_message() self.action_tracker.append(action.data.numpy().tolist()) self.candidate_length_tracker.append( len(self.recent_candidate_list)) else: new_message = self.prepare_rec_message() # following are for writing to numpy array action = None if new_message.message_type == cfg.ASK_FACET: action = cfg.FACET_POOL.index(new_message.data['facet']) if new_message.message_type == cfg.MAKE_REC: action = len(cfg.FACET_POOL) if cfg.purpose == 'pretrain': self.numpy_list.append((action, state_vector)) # end following with open(self.write_fp, 'a') as f: f.write('Turn count: {}, candidate length: {}\n'.format( self.turn_count, len(self.recent_candidate_list))) return new_message
def post_processing(logits, image_size, gt_classes, anchors, conf_threshold, nms_threshold): num_anchors = len(anchors) anchors = torch.Tensor(anchors) if isinstance(logits, Variable): logits = logits.data if logits.dim() == 3: logits.unsqueeze_(0) batch = logits.size(0) h = logits.size(2) w = logits.size(3) # Compute xc,yc, w,h, box_score on Tensor lin_x = torch.linspace(0, w - 1, w).repeat(h, 1).view(h * w) lin_y = torch.linspace(0, h - 1, h).repeat(w, 1).t().contiguous().view(h * w) anchor_w = anchors[:, 0].contiguous().view(1, num_anchors, 1) anchor_h = anchors[:, 1].contiguous().view(1, num_anchors, 1) if torch.cuda.is_available(): lin_x = lin_x.cuda() lin_y = lin_y.cuda() anchor_w = anchor_w.cuda() anchor_h = anchor_h.cuda() logits = logits.view(batch, num_anchors, -1, h * w) logits[:, :, 0, :].sigmoid_().add_(lin_x).div_(w) logits[:, :, 1, :].sigmoid_().add_(lin_y).div_(h) logits[:, :, 2, :].exp_().mul_(anchor_w).div_(w) logits[:, :, 3, :].exp_().mul_(anchor_h).div_(h) logits[:, :, 4, :].sigmoid_() with torch.no_grad(): cls_scores = torch.nn.functional.softmax(logits[:, :, 5:, :], 2) cls_max, cls_max_idx = torch.max(cls_scores, 2) cls_max_idx = cls_max_idx.float() cls_max.mul_(logits[:, :, 4, :]) score_thresh = cls_max > conf_threshold score_thresh_flat = score_thresh.view(-1) if score_thresh.sum() == 0: predicted_boxes = [] for i in range(batch): predicted_boxes.append(torch.Tensor([])) else: coords = logits.transpose(2, 3)[..., 0:4] coords = coords[score_thresh[..., None].expand_as(coords)].view(-1, 4) scores = cls_max[score_thresh] idx = cls_max_idx[score_thresh] detections = torch.cat([coords, scores[:, None], idx[:, None]], dim=1) max_det_per_batch = num_anchors * h * w slices = [ slice(max_det_per_batch * i, max_det_per_batch * (i + 1)) for i in range(batch) ] det_per_batch = torch.IntTensor( [score_thresh_flat[s].int().sum() for s in slices]) split_idx = torch.cumsum(det_per_batch, dim=0) # Group detections per image of batch predicted_boxes = [] start = 0 for end in split_idx: predicted_boxes.append(detections[start:end]) start = end selected_boxes = [] for boxes in predicted_boxes: if boxes.numel() == 0: return boxes a = boxes[:, :2] b = boxes[:, 2:4] bboxes = torch.cat([a - b / 2, a + b / 2], 1) scores = boxes[:, 4] # Sort coordinates by descending score scores, order = scores.sort(0, descending=True) x1, y1, x2, y2 = bboxes[order].split(1, 1) # Compute dx and dy between each pair of boxes (these mat contain every pair twice...) dx = (x2.min(x2.t()) - x1.max(x1.t())).clamp(min=0) dy = (y2.min(y2.t()) - y1.max(y1.t())).clamp(min=0) # Compute iou intersections = dx * dy areas = (x2 - x1) * (y2 - y1) unions = (areas + areas.t()) - intersections ious = intersections / unions # Filter based on iou (and class) conflicting = (ious > nms_threshold).triu(1) keep = conflicting.sum(0).byte() keep = keep.cpu() conflicting = conflicting.cpu() keep_len = len(keep) - 1 for i in range(1, keep_len): if keep[i] > 0: keep -= conflicting[i] if torch.cuda.is_available(): keep = keep.cuda() keep = (keep == 0) selected_boxes.append(boxes[order][keep[:, None].expand_as(boxes)].view( -1, 6).contiguous()) final_boxes = [] for boxes in selected_boxes: if boxes.dim() == 0: final_boxes.append([]) else: boxes[:, 0:3:2] *= image_size boxes[:, 0] -= boxes[:, 2] / 2 boxes[:, 1:4:2] *= image_size boxes[:, 1] -= boxes[:, 3] / 2 final_boxes.append([[ box[0].item(), box[1].item(), box[2].item(), box[3].item(), box[4].item(), gt_classes[int(box[5].item())] ] for box in boxes]) return final_boxes
def evaluate(query_features, query_labels, query_cams, gallery_features, gallery_labels, gallery_cams): """Evaluate the CMC and mAP Arguments: query_features {np.ndarray of size NxC} -- Features of probe images query_labels {np.ndarray of query size N} -- Labels of probe images query_cams {np.ndarray of query size N} -- Cameras of probe images gallery_features {np.ndarray of size N'xC} -- Features of gallery images gallery_labels {np.ndarray of gallery size N'} -- Lables of gallery images gallery_cams {np.ndarray of gallery size N'} -- Cameras of gallery images Returns: (torch.IntTensor, float) -- CMC list, mAP """ CMC = torch.IntTensor(len(gallery_labels)).zero_() AP = 0 for i in range(len(query_labels)): query_feature = query_features[i] query_label = query_labels[i] query_cam = query_cams[i] # Prediction score score = np.dot(gallery_features, query_feature) match_query_index = np.argwhere(gallery_labels == query_label) same_camera_index = np.argwhere(gallery_cams == query_cam) # Positive index is the matched indexs at different camera i.e. the desired result positive_index = np.setdiff1d(match_query_index, same_camera_index, assume_unique=True) # Junk index is the indexs at the same camera or the unlabeled image junk_index = np.append(np.argwhere(gallery_labels == -1), np.intersect1d(match_query_index, same_camera_index)) # .flatten() index = np.arange(len(gallery_labels)) # Remove all the junk indexs sufficient_index = np.setdiff1d(index, junk_index) # compute AP y_true = np.in1d(sufficient_index, positive_index) y_score = score[sufficient_index] AP += average_precision_score(y_true, y_score) # Compute CMC # Sort the sufficient index by their scores, from large to small lexsort_index = np.argsort(y_score) sorted_y_true = y_true[lexsort_index[::-1]] match_index = np.argwhere(sorted_y_true == True) if match_index.size > 0: first_match_index = match_index.flatten()[0] CMC[first_match_index:] += 1 CMC = CMC.float() CMC = CMC / len(query_labels) * 100 # average CMC mAP = AP / len(query_labels) * 100 return CMC, mAP
def train(opt): """ dataset preparation """ if not opt.data_filtering_off: print( 'Filtering the images containing characters which are not in opt.character' ) print( 'Filtering the images whose label is longer than opt.batch_max_length' ) # see https://github.com/clovaai/deep-text-recognition-benchmark/blob/6593928855fb7abb999a99f428b3e4477d4ae356/dataset.py#L130 opt.select_data = opt.select_data.split('-') opt.batch_ratio = opt.batch_ratio.split('-') train_dataset = Batch_Balanced_Dataset(opt) log = open(f'./saved_models/{opt.exp_name}/log_dataset.txt', 'a') AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) valid_dataset, valid_dataset_log = hierarchical_dataset( root=opt.valid_data, opt=opt) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=opt.batch_size, shuffle= True, # 'True' to check training progress with validation function. num_workers=int(opt.workers), collate_fn=AlignCollate_valid, pin_memory=True) log.write(valid_dataset_log) print('-' * 80) log.write('-' * 80 + '\n') log.close() """ model configuration """ if 'CTC' in opt.Prediction: if opt.baiduCTC: converter = CTCLabelConverterForBaiduWarpctc(opt.character) else: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) # weight initialization for name, param in model.named_parameters(): if 'localization_fc2' in name: print(f'Skip {name} as it is already initialized') continue try: if 'bias' in name: init.constant_(param, 0.0) elif 'weight' in name: init.kaiming_normal_(param) except Exception as e: # for batchnorm. if 'weight' in name: param.data.fill_(1) continue # data parallel for multi-GPU model = torch.nn.DataParallel(model).to(device) model.train() if opt.saved_model != '': print(f'loading pretrained model from {opt.saved_model}') if opt.FT: model.load_state_dict(torch.load(opt.saved_model), strict=False) else: model.load_state_dict(torch.load(opt.saved_model)) print("Model:") print(model) """ setup loss """ if 'CTC' in opt.Prediction: if opt.baiduCTC: # need to install warpctc. see our guideline. from warpctc_pytorch import CTCLoss criterion = CTCLoss() else: criterion = torch.nn.CTCLoss(zero_infinity=True).to(device) else: criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to( device) # ignore [GO] token = ignore index 0 # loss averager loss_avg = Averager() # filter that only require gradient decent filtered_parameters = [] params_num = [] for p in filter(lambda p: p.requires_grad, model.parameters()): filtered_parameters.append(p) params_num.append(np.prod(p.size())) print('Trainable params num : ', sum(params_num)) # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())] # setup optimizer if opt.adam: optimizer = optim.Adam(filtered_parameters, lr=opt.lr, betas=(opt.beta1, 0.999)) else: optimizer = optim.Adadelta(filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps) print("Optimizer:") print(optimizer) """ final options """ # print(opt) with open(f'./saved_models/{opt.exp_name}/opt.txt', 'a') as opt_file: opt_log = '------------ Options -------------\n' args = vars(opt) for k, v in args.items(): opt_log += f'{str(k)}: {str(v)}\n' opt_log += '---------------------------------------\n' print(opt_log) opt_file.write(opt_log) """ start training """ start_iter = 0 if opt.saved_model != '': try: start_iter = int(opt.saved_model.split('_')[-1].split('.')[0]) print(f'continue to train, start_iter: {start_iter}') except: pass start_time = time.time() best_accuracy = -1 best_norm_ED = -1 iteration = start_iter while (True): # train part image_tensors, labels = train_dataset.get_batch() image = image_tensors.to(device) text, length = converter.encode(labels, batch_max_length=opt.batch_max_length) batch_size = image.size(0) if 'CTC' in opt.Prediction: preds = model(image, text) preds_size = torch.IntTensor([preds.size(1)] * batch_size) if opt.baiduCTC: preds = preds.permute(1, 0, 2) # to use CTCLoss format cost = criterion(preds, text, preds_size, length) / batch_size else: preds = preds.log_softmax(2).permute(1, 0, 2) cost = criterion(preds, text, preds_size, length) else: preds = model(image, text[:, :-1]) # align with Attention.forward target = text[:, 1:] # without [GO] Symbol cost = criterion(preds.view(-1, preds.shape[-1]), target.contiguous().view(-1)) model.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm_( model.parameters(), opt.grad_clip) # gradient clipping with 5 (Default) optimizer.step() loss_avg.add(cost) # validation part if ( iteration + 1 ) % opt.valInterval == 0 or iteration == 0: # To see training progress, we also conduct validation when 'iteration == 0' elapsed_time = time.time() - start_time # for log with open(f'./saved_models/{opt.exp_name}/log_train.txt', 'a') as log: model.eval() with torch.no_grad(): valid_loss, current_accuracy, current_norm_ED, preds, confidence_score, labels, infer_time, length_of_data = validation( model, criterion, valid_loader, converter, opt) model.train() # training loss and validation loss loss_log = f'[{iteration+1}/{opt.num_iter}] Train loss: {loss_avg.val():0.5f}, Valid loss: {valid_loss:0.5f}, Elapsed_time: {elapsed_time:0.5f}' loss_avg.reset() current_model_log = f'{"Current_accuracy":17s}: {current_accuracy:0.3f}, {"Current_norm_ED":17s}: {current_norm_ED:0.2f}' # keep best accuracy model (on valid dataset) if current_accuracy > best_accuracy: best_accuracy = current_accuracy torch.save( model.state_dict(), f'./saved_models/{opt.exp_name}/best_accuracy.pth') if current_norm_ED > best_norm_ED: best_norm_ED = current_norm_ED torch.save( model.state_dict(), f'./saved_models/{opt.exp_name}/best_norm_ED.pth') best_model_log = f'{"Best_accuracy":17s}: {best_accuracy:0.3f}, {"Best_norm_ED":17s}: {best_norm_ED:0.2f}' loss_model_log = f'{loss_log}\n{current_model_log}\n{best_model_log}' print(loss_model_log) log.write(loss_model_log + '\n') # show some predicted results dashed_line = '-' * 80 head = f'{"Ground Truth":25s} | {"Prediction":25s} | Confidence Score & T/F' predicted_result_log = f'{dashed_line}\n{head}\n{dashed_line}\n' for gt, pred, confidence in zip(labels[:5], preds[:5], confidence_score[:5]): if 'Attn' in opt.Prediction: gt = gt[:gt.find('[s]')] pred = pred[:pred.find('[s]')] predicted_result_log += f'{gt:25s} | {pred:25s} | {confidence:0.4f}\t{str(pred == gt)}\n' predicted_result_log += f'{dashed_line}' print(predicted_result_log) log.write(predicted_result_log + '\n') # save model per 1e+5 iter. if (iteration + 1) % 1e+5 == 0: torch.save( model.state_dict(), f'./saved_models/{opt.exp_name}/iter_{iteration+1}.pth') if (iteration + 1) == opt.num_iter: print('end the training') sys.exit() iteration += 1
def get_text(self, audiopath_and_text): text = audiopath_and_text[1] text_norm = torch.IntTensor(text_to_sequence(text, self.text_cleaners)) return text_norm
def train(opt): """ dataset preparation """ opt.select_data = opt.select_data.split('-') opt.batch_ratio = opt.batch_ratio.split('-') train_dataset = Batch_Balanced_Dataset(opt) AlignCollate_valid = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) valid_dataset = hierarchical_dataset(root=opt.valid_data, opt=opt) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=opt.batch_size, shuffle= True, # 'True' to check training progress with validation function. num_workers=int(opt.workers), collate_fn=AlignCollate_valid, pin_memory=True) print('-' * 80) """ model configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) # weight initialization for name, param in model.named_parameters(): if 'localization_fc2' in name: print(f'Skip {name} as it is already initialized') continue try: if 'bias' in name: init.constant_(param, 0.0) elif 'weight' in name: init.kaiming_normal_(param) except Exception as e: # for batchnorm. if 'weight' in name: param.data.fill_(1) continue # data parallel for multi-GPU model = torch.nn.DataParallel(model).to(device) model.train() if opt.continue_model != '': print(f'loading pretrained model from {opt.continue_model}') model.load_state_dict(torch.load(opt.continue_model)) print("Model:") print(model) """ setup loss """ if 'CTC' in opt.Prediction: criterion = torch.nn.CTCLoss(zero_infinity=True).to(device) else: criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to( device) # ignore [GO] token = ignore index 0 # loss averager loss_avg = Averager() # filter that only require gradient decent filtered_parameters = [] params_num = [] for p in filter(lambda p: p.requires_grad, model.parameters()): filtered_parameters.append(p) params_num.append(np.prod(p.size())) print('Trainable params num : ', sum(params_num)) # [print(name, p.numel()) for name, p in filter(lambda p: p[1].requires_grad, model.named_parameters())] # setup optimizer if opt.adam: optimizer = optim.Adam(filtered_parameters, lr=opt.lr, betas=(opt.beta1, 0.999)) else: optimizer = optim.Adadelta(filtered_parameters, lr=opt.lr, rho=opt.rho, eps=opt.eps) print("Optimizer:") print(optimizer) """ final options """ # print(opt) with open(f'./saved_models/{opt.experiment_name}/opt.txt', 'a') as opt_file: opt_log = '------------ Options -------------\n' args = vars(opt) for k, v in args.items(): opt_log += f'{str(k)}: {str(v)}\n' opt_log += '---------------------------------------\n' print(opt_log) opt_file.write(opt_log) """ start training """ start_iter = 0 if opt.continue_model != '': start_iter = int(opt.continue_model.split('_')[-1].split('.')[0]) print(f'continue to train, start_iter: {start_iter}') start_time = time.time() best_accuracy = -1 best_norm_ED = 1e+6 i = start_iter while (True): # train part image_tensors, labels = train_dataset.get_batch() image = image_tensors.to(device) text, length = converter.encode(labels, batch_max_length=opt.batch_max_length) batch_size = image.size(0) if 'CTC' in opt.Prediction: preds = model(image, text).log_softmax(2) preds_size = torch.IntTensor([preds.size(1)] * batch_size).to(device) preds = preds.permute(1, 0, 2) # to use CTCLoss format # To avoid ctc_loss issue, disabled cudnn for the computation of the ctc_loss # https://github.com/jpuigcerver/PyLaia/issues/16 torch.backends.cudnn.enabled = False cost = criterion(preds, text, preds_size, length) torch.backends.cudnn.enabled = True else: preds = model(image, text[:, :-1]) # align with Attention.forward target = text[:, 1:] # without [GO] Symbol cost = criterion(preds.view(-1, preds.shape[-1]), target.contiguous().view(-1)) model.zero_grad() cost.backward() torch.nn.utils.clip_grad_norm_( model.parameters(), opt.grad_clip) # gradient clipping with 5 (Default) optimizer.step() loss_avg.add(cost) # validation part if i % opt.valInterval == 0: elapsed_time = time.time() - start_time print( f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}' ) # for log with open(f'./saved_models/{opt.experiment_name}/log_train.txt', 'a') as log: log.write( f'[{i}/{opt.num_iter}] Loss: {loss_avg.val():0.5f} elapsed_time: {elapsed_time:0.5f}\n' ) loss_avg.reset() model.eval() with torch.no_grad(): valid_loss, current_accuracy, current_norm_ED, preds, labels, infer_time, length_of_data = validation( model, criterion, valid_loader, converter, opt) model.train() for pred, gt in zip(preds[:5], labels[:5]): if 'Attn' in opt.Prediction: pred = pred[:pred.find('[s]')] gt = gt[:gt.find('[s]')] print(f'{pred:20s}, gt: {gt:20s}, {str(pred == gt)}') log.write( f'{pred:20s}, gt: {gt:20s}, {str(pred == gt)}\n') valid_log = f'[{i}/{opt.num_iter}] valid loss: {valid_loss:0.5f}' valid_log += f' accuracy: {current_accuracy:0.3f}, norm_ED: {current_norm_ED:0.2f}' print(valid_log) log.write(valid_log + '\n') # keep best accuracy model if current_accuracy > best_accuracy: best_accuracy = current_accuracy torch.save( model.state_dict(), f'./saved_models/{opt.experiment_name}/best_accuracy.pth' ) if current_norm_ED < best_norm_ED: best_norm_ED = current_norm_ED torch.save( model.state_dict(), f'./saved_models/{opt.experiment_name}/best_norm_ED.pth' ) best_model_log = f'best_accuracy: {best_accuracy:0.3f}, best_norm_ED: {best_norm_ED:0.2f}' print(best_model_log) log.write(best_model_log + '\n') # save model per 1e+5 iter. if (i + 1) % 1e+5 == 0: torch.save(model.state_dict(), f'./saved_models/{opt.experiment_name}/iter_{i+1}.pth') if i == opt.num_iter: print('end the training') sys.exit() i += 1
#创建tensor(关于numoy) a = np.array([2, 3.3]) torch.from_numpy(a) a = np.ones([2, 3]) torch.from_numpy(a) #这里tensor函数(小写)接收的是具体的数据;Tensor/Float Tensor()接收的是shape torch.tensor([2, 3.2]) torch.FloatTensor(2, 3) #申请一片控件(未初始化的数据填充;但数据奇怪 torch.empty(1) torch.FloatTensor(2, 2, 2) #这里给的同样是shape,如果需要具体数据以list形式写入 torch.IntTensor(3, 3, 3, 3) #Tensor()初始化类型是float,但使用中常用double; torch.set_default_tensor_type(torch.DoubleTensor) torch.tensor([1.2, 3]).type() #随机初始化 #rand是[0,1]的随机分布;randn是(0,1)的正态分布;normal的API操作更加复杂 a = torch.rand(3, 3) #三行三列 torch.rand_like(a) torch.normal(mean=torch.full([10], 0), std=torch.arange(1, 0, -0.1)) #使用arrange可以生成[start,end)的一个等差数列,默认1递增 #linsapce/logspace以steps参数值进行[start,end]区间的等长或者log长划分 torch.linspace(0, 10, steps=4) torch.logspace(0, 10, steps=4)
def dataGenerator(file_name='../data/trainYelp.txt', train_split=0.8, max_length=1014, indexing_choice=0, nb_merge=50): if (indexing_choice == 0): index = indexing if (indexing_choice == 1): index = bigIndexing if (indexing_choice == 2): index = altIndexing list_string = [] with open(file_name) as infile: while True: text = infile.readline() if len(text) == 0: break info = json.loads(text) data = info["review"] list_string.append(data) list_subword = create_vocab(list_string, nb_merge) print("end training BPE") list_subword_witout_end = create_list_sobword_without_end(list_subword) start_index = max(index.values()) for i, sub in enumerate(list_subword_witout_end): index[sub] = start_index + i + 1 print('index', index) dataset = [] print("start encoding training") with open(file_name) as infile: while True: text = infile.readline() if len(text) == 0: break info = json.loads(text) rating = info["rating"] data = info["review"] review = torch.zeros(max_length).long() tokenizer = RegexpTokenizer(r'\w+') list_words = tokenizer.tokenize(data) list_word_subwords = [ transform_BPE_word(i, list_subword_witout_end) for i in list_words ] list_subwords = [] for word in list_word_subwords: list_subwords += word list_subwords += ' ' for i in range(min(max_length, len(list_subwords))): unit = list_subwords[i].lower() if unit in index: review[i] = index[unit] else: review[i] = index['UNK'] dataset.append({ 'review': review, 'rating': torch.IntTensor([rating]) }) print("end encoding training") #random split 0.8 / 0.2 dataset_train, dataset_val = train_test_split(dataset, test_size=1 - train_split) alphabet_size = max(index.values()) + 1 return dataset_train, dataset_val, list_subword_witout_end, alphabet_size
def test_private_compare(workers): """ Test private compare which returns: β′ = β ⊕ (x > r). """ alice, bob, james = workers["alice"], workers["bob"], workers["james"] L = 2**64 x_bit_sh = (decompose(torch.LongTensor([13]), L).share(alice, bob, crypto_provider=james, field=67, dtype="custom").child) r = torch.LongTensor([12]).send(alice, bob).child beta = torch.LongTensor([1]).send(alice, bob).child beta_p = private_compare(x_bit_sh, r, beta, L) assert not beta_p beta = torch.LongTensor([0]).send(alice, bob).child beta_p = private_compare(x_bit_sh, r, beta, L) assert beta_p # Big values x_bit_sh = (decompose(torch.LongTensor([2**60]), L).share(alice, bob, crypto_provider=james, field=67, dtype="custom").child) r = torch.LongTensor([2**61]).send(alice, bob).child beta = torch.LongTensor([1]).send(alice, bob).child beta_p = private_compare(x_bit_sh, r, beta, L) assert beta_p beta = torch.LongTensor([0]).send(alice, bob).child beta_p = private_compare(x_bit_sh, r, beta, L) assert not beta_p # Multidimensional tensors x_bit_sh = (decompose(torch.LongTensor([[13, 44], [1, 28]]), L).share(alice, bob, crypto_provider=james, field=67, dtype="custom").child) r = torch.LongTensor([[12, 44], [12, 33]]).send(alice, bob).child beta = torch.LongTensor([1]).send(alice, bob).child beta_p = private_compare(x_bit_sh, r, beta, L) assert (beta_p == torch.tensor([[0, 1], [1, 1]])).all() beta = torch.LongTensor([0]).send(alice, bob).child beta_p = private_compare(x_bit_sh, r, beta, L) assert (beta_p == torch.tensor([[1, 0], [0, 0]])).all() # Negative values x_val = -105 r_val = -52 % 2**63 # The protocol works only for values in Zq x_bit_sh = (decompose(torch.LongTensor([x_val]), L).share(alice, bob, crypto_provider=james, field=67, dtype="custom").child) r = torch.LongTensor([r_val]).send(alice, bob).child beta = torch.LongTensor([1]).send(alice, bob).child beta_p = private_compare(x_bit_sh, r, beta, L) assert beta_p beta = torch.LongTensor([0]).send(alice, bob).child beta_p = private_compare(x_bit_sh, r, beta, L) assert not beta_p # With dtype int L = 2**32 x_bit_sh = (decompose(torch.IntTensor([13]), L).share(alice, bob, crypto_provider=james, field=67, dtype="custom").child) r = torch.IntTensor([12]).send(alice, bob).child beta = torch.IntTensor([1]).send(alice, bob).child beta_p = private_compare(x_bit_sh, r, beta, L) assert not beta_p beta = torch.IntTensor([0]).send(alice, bob).child beta_p = private_compare(x_bit_sh, r, beta, L) assert beta_p
def get_text(self, text): text_encoded = torch.IntTensor(self.tp.encode_text(text)) return text_encoded
def test_forward(args): args = make_args(**args) batch_size = 4 xmaxs = [40, 45] if args['chunk_size_left'] == -1 else [400, 455] device = "cpu" module = importlib.import_module('neural_sp.models.seq2seq.encoders.rnn') enc = module.RNNEncoder(**args) enc = enc.to(device) for xmax in xmaxs: xs = np.random.randn(batch_size, xmax, args['input_dim']).astype(np.float32) xlens = torch.IntTensor( [len(x) - i * enc.subsampling_factor for i, x in enumerate(xs)]) xs = pad_list([np2tensor(x, device).float() for x in xs], 0.) enc_out_dict = enc(xs, xlens, task='all') assert enc_out_dict['ys']['xs'].size(0) == batch_size assert enc_out_dict['ys']['xs'].size( 1) == enc_out_dict['ys']['xlens'].max() for b in range(batch_size): if 'conv' in args['rnn_type'] or args['subsample_type'] in [ 'max_pool', '1dconv' ]: assert enc_out_dict['ys']['xlens'][b].item() == math.ceil( xlens[b].item() / enc.subsampling_factor) else: assert enc_out_dict['ys']['xlens'][b].item() == math.floor( xlens[b].item() / enc.subsampling_factor) if args['n_layers_sub1'] > 0: # all outputs assert enc_out_dict['ys_sub1']['xs'].size(0) == batch_size assert enc_out_dict['ys_sub1']['xs'].size( 1) == enc_out_dict['ys_sub1']['xlens'].max() for b in range(batch_size): if 'conv' in args['rnn_type'] or args['subsample_type'] in [ 'max_pool', '1dconv' ]: assert enc_out_dict['ys_sub1']['xlens'][b].item( ) == math.ceil(xlens[b].item() / enc.subsampling_factor) else: assert enc_out_dict['ys_sub1']['xlens'][b].item( ) == math.floor(xlens[b].item() / enc.subsampling_factor) # single output enc_out_dict_sub1 = enc(xs, xlens, task='ys_sub1') assert enc_out_dict_sub1['ys_sub1']['xs'].size(0) == batch_size assert enc_out_dict_sub1['ys_sub1']['xs'].size( 1) == enc_out_dict['ys_sub1']['xlens'].max() if args['n_layers_sub2'] > 0: # all outputs assert enc_out_dict['ys_sub2']['xs'].size(0) == batch_size assert enc_out_dict['ys_sub2']['xs'].size( 1) == enc_out_dict['ys_sub2']['xlens'].max() for b in range(batch_size): if 'conv' in args['rnn_type'] or args['subsample_type'] in [ 'max_pool', '1dconv' ]: assert enc_out_dict['ys_sub2']['xlens'][b].item( ) == math.ceil(xlens[b].item() / enc.subsampling_factor) else: assert enc_out_dict['ys_sub2']['xlens'][b].item( ) == math.floor(xlens[b].item() / enc.subsampling_factor) # single output enc_out_dict_sub12 = enc(xs, xlens, task='ys_sub2') assert enc_out_dict_sub12['ys_sub2']['xs'].size(0) == batch_size assert enc_out_dict_sub12['ys_sub2']['xs'].size( 1) == enc_out_dict_sub12['ys_sub2']['xlens'].max()
def predict(self, image_list): if len(image_list) <= 0: return [""] demo_data = RawDataset(image_list, opt=self.opt) # use RawDataset demo_loader = torch.utils.data.DataLoader( demo_data, batch_size=self.opt.batch_size, shuffle=False, num_workers=int(self.opt.workers), collate_fn=self.AlignCollate_demo, pin_memory=True) # predict ret = [] self.model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([self.opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, self.opt.batch_max_length + 1).fill_(0).to(device) if 'CTC' in self.opt.Prediction: preds = self.model(image, text_for_pred) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.max(2) preds_index = preds_index.view(-1) preds_str = self.converter.decode(preds_index.data, preds_size.data) else: preds = self.model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = self.converter.decode(preds_index, length_for_pred) # log = open(f'./log_demo_result.txt', 'a') # dashed_line = '-' * 80 # head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score' # print(f'{dashed_line}\n{head}\n{dashed_line}') # log.write(f'{dashed_line}\n{head}\n{dashed_line}\n') preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_name, pred, pred_max_prob in zip( image_path_list, preds_str, preds_max_prob): if 'Attn' in self.opt.Prediction: pred_EOS = pred.find('[s]') pred = pred[: pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) confidence_score = pred_max_prob.cumprod(dim=0)[-1] if confidence_score <= 0.4: pred = "None" ret.append(pred) # ret.append(preds_str) print( f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}') return ret
def preprocess(self, image): image = load_image(image) target_size = torch.IntTensor([[image.height, image.width]]) inputs = self.feature_extractor(images=[image], return_tensors="pt") inputs["target_size"] = target_size return inputs
import kay import random import math from tqdm import tqdm pic_res = 256 objs = [] bunny = pykay.OBJ("./models/01/", "bunny.obj") objs.append(bunny) rt = kay.Rtcore() vertex = [] index = [] for i in range(len(objs)): vertex.append(torch.Tensor(objs[i].vertices)) index.append(torch.IntTensor(objs[i].faces)) print(objs[i].vcount, objs[i].fcount) rt.addGeo(kay.float_ptr(vertex[i].data_ptr()), kay.unsigned_int_ptr(index[i].data_ptr()), objs[i].vcount, objs[i].fcount) rt.RTsetup() # camera info init pos = torch.Tensor([0.0, 0.0, 0.2]) look = torch.Tensor([0, 0, 0]) up = torch.Tensor([0, 1, 0]) c = pykay.Camera(pos, look, up, 1.0, 1.0) center = c.pos + c.f_dist * c.look_dir temp = c.f_dist * c._fov # half height right = torch.cross(c.look_dir, c.up) left_up = center + temp * c.up - temp * right
def collate( samples, pad_idx, chunk_width, chunk_left_context, chunk_right_context, label_delay, seed, epoch, pad_to_length=None, pad_to_multiple=1, src_bucketed=False, random_chunking=True, ): if len(samples) == 0: return {} def merge(key, pad_to_length=None): if key == "source": return speech_utils.collate_frames( [s[key] for s in samples], 0.0, pad_to_length=pad_to_length, pad_to_multiple=pad_to_multiple, ) elif key == "target": return data_utils.collate_tokens( [s[key] for s in samples], pad_idx=pad_idx, eos_idx=None, left_pad=False, move_eos_to_beginning=False, pad_to_length=pad_to_length, pad_to_multiple=pad_to_multiple, ) else: raise ValueError("Invalid key.") def chunking(src_item, tgt_item, tgt_start): # make a src chunk in the range [begin_src, end_src) begin_src = max(0, tgt_start + label_delay - chunk_left_context) # ok if end_src past the end of utterance end_src = tgt_start + label_delay + chunk_width + chunk_right_context # replication pad if necessary left_pad = max(0, chunk_left_context - tgt_start - label_delay) right_pad = max(0, end_src - src_item.size(0)) src_item = src_item[begin_src:end_src] if left_pad > 0 or right_pad > 0: src_item = F.pad( src_item.t().unsqueeze(0), (left_pad, right_pad), mode="replicate", ).squeeze(0).t() if tgt_item is not None: # make a tgt chunk in the range [begin_tgt, end_tgt) begin_tgt = tgt_start end_tgt = tgt_start + chunk_width # ok if past the end of utterance # replication pad if necessary right_pad = max(0, end_tgt - tgt_item.size(0)) tgt_item = tgt_item[begin_tgt:end_tgt] if right_pad > 0: tgt_item = torch.cat( (tgt_item, tgt_item.new_full((right_pad, ), pad_idx)), 0) return src_item, tgt_item if chunk_width is None or random_chunking: if chunk_width is not None: # usually for chunk-wise train data # no need to sort as all chunks have exactly the same length for s in samples: with data_utils.numpy_seed(seed, epoch, s["id"]): # generate a chunk by sampling the index of its first label f = np.random.randint(s["source"].size(0) - chunk_width + 1) s["source"], s["target"] = chunking(s["source"], s["target"], f) elif label_delay != 0: # shift source according to label_delay if label_delay > 0: left_pad, right_pad = 0, label_delay else: left_pad, right_pad = -label_delay, 0 for s in samples: src_item = s["source"] src_item = F.pad( src_item.t().unsqueeze(0), (left_pad, right_pad), mode="replicate", ).squeeze(0).t() if label_delay > 0: s["source"] = src_item[label_delay:] else: s["source"] = src_item[:label_delay] if pad_to_length is not None or src_bucketed: src_lengths = torch.IntTensor( [s["source"].ne(0.0).any(dim=1).int().sum() for s in samples]) else: src_lengths = torch.IntTensor( [s["source"].size(0) for s in samples]) id = torch.LongTensor([s["id"] for s in samples]) utt_id = [s["utt_id"] for s in samples] src_frames = merge( "source", pad_to_length=pad_to_length["source"] if pad_to_length is not None else None, ) target = None if samples[0].get("target", None) is not None: target = merge( "target", pad_to_length=pad_to_length["target"] if pad_to_length is not None else None, ) ntokens = sum(s["target"].ne(pad_idx).int().sum().item() for s in samples) else: ntokens = src_lengths.sum().item() text = None if samples[0].get("text", None) is not None: text = [s["text"] for s in samples] if chunk_width is None: # for whole utterances (i.e., no chunking) # sort by descending source length src_lengths, sort_order = src_lengths.sort(descending=True) id = id.index_select(0, sort_order) utt_id = [utt_id[i] for i in sort_order.numpy()] src_frames = src_frames.index_select(0, sort_order) if target is not None: target = target.index_select(0, sort_order) if text is not None: text = [text[i] for i in sort_order.numpy()] batch = { "id": id, "utt_id": utt_id, "nsentences": len(samples), "ntokens": ntokens, "net_input": { "src_tokens": src_frames, "src_lengths": src_lengths }, "target": target, "text": text, } return batch else: # sequential chunking, usually for chunk-wise test data if pad_to_length is not None or src_bucketed: src_lengths = torch.IntTensor( [s["source"].ne(0.0).any(dim=1).int().sum() for s in samples]) else: src_lengths = torch.IntTensor( [s["source"].size(0) for s in samples]) id = torch.LongTensor([s["id"] for s in samples]) utt_id = [s["utt_id"] for s in samples] ori_source = [s["source"] for s in samples] ori_target = [s["target"] for s in samples] text = None if samples[0].get("text", None) is not None: text = [s["text"] for s in samples] max_length = max(src.size(0) for src in ori_source) num_chunks = (max_length + chunk_width - 1) // chunk_width batches = [] for k in range(num_chunks): f = k * chunk_width for i, s in enumerate(samples): if f < src_lengths[i].item(): s["source"], s["target"] = chunking( ori_source[i], ori_target[i], f) else: s["source"] = ori_source[i].new_zeros( chunk_width + chunk_left_context + chunk_right_context, ori_source[i].size(1)) s["target"] = (ori_target[i].new_full( (chunk_width, ), pad_idx) if ori_target[i] is not None else None) src_frames = merge( "source", pad_to_length=pad_to_length["source"] if pad_to_length is not None else None, ) src_chunk_lengths = torch.IntTensor( [s["source"].size(0) for s in samples]) target = None if samples[0].get("target", None) is not None: target = merge( "target", pad_to_length=pad_to_length["target"] if pad_to_length is not None else None, ) ntokens = sum(s["target"].ne(pad_idx).int().sum().item() for s in samples) else: ntokens = src_lengths.sum().item() batch = { "id": id, "utt_id": utt_id, "nsentences": len(samples) if k == 0 else 0, "ntokens": ntokens, "net_input": { "src_tokens": src_frames, "src_lengths": src_chunk_lengths }, "target": target, "text": text, } batches.append(batch) return batches
input = torch.randn((128, 1, 24, 24, 24)).cuda() print('input', input.shape) out = Conv3d_1(input.shape[1], 256, 5).cuda()(input) print('After cov1', out.shape) out = PrimaryCapsules(input_shape=(256, 16, 16, 16), capsule_dim=8, out_channels=32, kernel_size=9, stride=2).cuda()(out) print('After PrimaryCapsules', out.shape) out = Routing().cuda()(out, 2) print('After Routing', out.shape) score = Norm()(out) print('After Norm', score.shape) decoder = Decoder(16, int(np.prod((1, 24, 24, 24))), (1, 24, 24, 24)).cuda() y = torch.IntTensor( np.array([np.random.randint(0, 10) for i in range(128)])) reconstruction = decoder(out, y).view((-1, ) + (1, 24, 24, 24)) print('After reconstruction', reconstruction.shape) model = PointCapsNet((1, 24, 24, 24), 3).cuda() y_pred, x_reconstruction = model(input, y) print('x shape', input.shape) print('y shape', y.shape) print(y_pred.shape) print(x_reconstruction.shape) # Draw network structure #from torchviz import make_dot #draw = make_dot((y_pred, x_reconstruction), params=dict(model.named_parameters())) #draw.view()
def test_multi_loss_factory(): from mmpose.models import build_loss # test heatmap loss loss_cfg = dict(type='HeatmapLoss') loss = build_loss(loss_cfg) with pytest.raises(AssertionError): fake_pred = torch.zeros((2, 3, 64, 64)) fake_label = torch.zeros((1, 3, 64, 64)) fake_mask = torch.zeros((1, 64, 64)) loss(fake_pred, fake_label, fake_mask) fake_pred = torch.zeros((1, 3, 64, 64)) fake_label = torch.zeros((1, 3, 64, 64)) fake_mask = torch.zeros((1, 64, 64)) assert torch.allclose(loss(fake_pred, fake_label, fake_mask), torch.tensor(0.)) fake_pred = torch.ones((1, 3, 64, 64)) fake_label = torch.zeros((1, 3, 64, 64)) fake_mask = torch.zeros((1, 64, 64)) assert torch.allclose(loss(fake_pred, fake_label, fake_mask), torch.tensor(0.)) fake_pred = torch.ones((1, 3, 64, 64)) fake_label = torch.zeros((1, 3, 64, 64)) fake_mask = torch.ones((1, 64, 64)) assert torch.allclose(loss(fake_pred, fake_label, fake_mask), torch.tensor(1.)) # test AE loss fake_tags = torch.zeros((1, 18, 1)) fake_joints = torch.zeros((1, 3, 2, 2), dtype=torch.int) loss_cfg = dict(type='AELoss', loss_type='exp') loss = build_loss(loss_cfg) assert torch.allclose(loss(fake_tags, fake_joints)[0], torch.tensor(0.)) assert torch.allclose(loss(fake_tags, fake_joints)[1], torch.tensor(0.)) fake_tags[0, 0, 0] = 1. fake_tags[0, 10, 0] = 0. fake_joints[0, 0, 0, :] = torch.IntTensor((0, 1)) fake_joints[0, 0, 1, :] = torch.IntTensor((10, 1)) loss_cfg = dict(type='AELoss', loss_type='exp') loss = build_loss(loss_cfg) assert torch.allclose(loss(fake_tags, fake_joints)[0], torch.tensor(0.)) assert torch.allclose(loss(fake_tags, fake_joints)[1], torch.tensor(0.25)) fake_tags[0, 0, 0] = 0 fake_tags[0, 7, 0] = 1. fake_tags[0, 17, 0] = 1. fake_joints[0, 1, 0, :] = torch.IntTensor((7, 1)) fake_joints[0, 1, 1, :] = torch.IntTensor((17, 1)) loss_cfg = dict(type='AELoss', loss_type='exp') loss = build_loss(loss_cfg) assert torch.allclose(loss(fake_tags, fake_joints)[1], torch.tensor(0.)) loss_cfg = dict(type='AELoss', loss_type='max') loss = build_loss(loss_cfg) assert torch.allclose(loss(fake_tags, fake_joints)[0], torch.tensor(0.)) with pytest.raises(ValueError): loss_cfg = dict(type='AELoss', loss_type='min') loss = build_loss(loss_cfg) loss(fake_tags, fake_joints) # test MultiLossFactory with pytest.raises(AssertionError): loss_cfg = dict(type='MultiLossFactory', num_joints=2, num_stages=1, ae_loss_type='exp', with_ae_loss=True, push_loss_factor=[0.001], pull_loss_factor=[0.001], with_heatmaps_loss=[True], heatmaps_loss_factor=[1.0]) loss = build_loss(loss_cfg) with pytest.raises(AssertionError): loss_cfg = dict(type='MultiLossFactory', num_joints=2, num_stages=1, ae_loss_type='exp', with_ae_loss=[True], push_loss_factor=0.001, pull_loss_factor=[0.001], with_heatmaps_loss=[True], heatmaps_loss_factor=[1.0]) loss = build_loss(loss_cfg) with pytest.raises(AssertionError): loss_cfg = dict(type='MultiLossFactory', num_joints=2, num_stages=1, ae_loss_type='exp', with_ae_loss=[True], push_loss_factor=[0.001], pull_loss_factor=0.001, with_heatmaps_loss=[True], heatmaps_loss_factor=[1.0]) loss = build_loss(loss_cfg) with pytest.raises(AssertionError): loss_cfg = dict(type='MultiLossFactory', num_joints=2, num_stages=1, ae_loss_type='exp', with_ae_loss=[True], push_loss_factor=[0.001], pull_loss_factor=[0.001], with_heatmaps_loss=True, heatmaps_loss_factor=[1.0]) loss = build_loss(loss_cfg) with pytest.raises(AssertionError): loss_cfg = dict(type='MultiLossFactory', num_joints=2, num_stages=1, ae_loss_type='exp', with_ae_loss=[True], push_loss_factor=[0.001], pull_loss_factor=[0.001], with_heatmaps_loss=[True], heatmaps_loss_factor=1.0) loss = build_loss(loss_cfg) loss_cfg = dict(type='MultiLossFactory', num_joints=17, num_stages=1, ae_loss_type='exp', with_ae_loss=[False], push_loss_factor=[0.001], pull_loss_factor=[0.001], with_heatmaps_loss=[False], heatmaps_loss_factor=[1.0]) loss = build_loss(loss_cfg) fake_outputs = [torch.zeros((1, 34, 64, 64))] fake_heatmaps = [torch.zeros((1, 17, 64, 64))] fake_masks = [torch.ones((1, 64, 64))] fake_joints = [torch.zeros((1, 30, 17, 2))] heatmaps_losses, push_losses, pull_losses = \ loss(fake_outputs, fake_heatmaps, fake_masks, fake_joints) assert heatmaps_losses == [None] assert pull_losses == [None] assert push_losses == [None] loss_cfg = dict(type='MultiLossFactory', num_joints=17, num_stages=1, ae_loss_type='exp', with_ae_loss=[True], push_loss_factor=[0.001], pull_loss_factor=[0.001], with_heatmaps_loss=[True], heatmaps_loss_factor=[1.0]) loss = build_loss(loss_cfg) heatmaps_losses, push_losses, pull_losses = \ loss(fake_outputs, fake_heatmaps, fake_masks, fake_joints) assert len(heatmaps_losses) == 1
def forward(self, inputs: dict) -> Union[dict, tuple]: """ inputs is a dict containing the below keys. The format of the tensors are indicated as e.g. `BTC`, `BMC` (etc), which can be interpreted as following. B: batch size, T: size of the rolling window over health history (i.e. number of time-stamps), C: number of generic channels, M: number of encounters, Elements with pre-determined shapes are indicated as such. For example: - B(14) indicates a tensor of shape (B, 14), - BM1 indicates a tensor of shape (B, M, 1) - B(T=14)C indicates a tensor of shape (B, 14, C) where 14 is the currently set size of the rolling window. Parameters ---------- inputs : dict A python dict with the following keys: -> `health_history`: a B(T=14)C tensor of the 14-day health history (symptoms + test results + day) of the individual. -> `health_profile`: a BC tensor of the health profile containing (age + health + preexisting_conditions) of the individual. -> `history_days`: a B(T=14)1 tensor of the day corresponding to the T dimension in `health_history`. -> `encounter_health`: a BMC tensor of health during an encounter indexed by M. -> `encounter_message`: a BMC tensor of the received message from the encounter partner. -> `encounter_day`: a BM1 tensor of the encounter day. -> `encounter_duration`: a BM1 tensor of the encounter duration. This is not the actual duration, but a proxy (for the number of encounters) -> `encounter_partner_id`: a binary BMC tensor specifying the ID of the encounter partner. -> `mask`: a BM mask tensor distinguishing the valid entries (1) from padding (0) in the set valued inputs. -> `valid_history_mask`: a B(14) mask tensor distinguising valid points in history (1) from padding (0). Returns ------- dict A dict containing the keys "encounter_variables" and "latent_variable". """ # -------- Shape Wrangling -------- batch_size = inputs["health_history"].shape[0] num_history_days = inputs["health_history"].shape[1] num_encounters = inputs["encounter_health"].shape[1] if not isinstance(num_encounters, torch.Tensor): # for tracing # noinspection PyArgumentList num_encounters = torch.IntTensor([num_encounters])[0] # -------- Embeddings -------- embeddings = self.embed(inputs) # -------- Self Attention -------- # Prepare the entities -- one set for the encounters and the other for self health # Before we start, expand health profile from BC to BMC and append to entities expanded_health_profile_per_encounter = embeddings[ "embedded_health_profile"][:, None, :].expand( batch_size, num_encounters, embeddings["embedded_health_profile"].shape[-1]) encounter_entities = torch.cat( [ embeddings["embedded_encounter_day"], embeddings["embedded_encounter_partner_ids"], embeddings["embedded_encounter_duration"], embeddings["embedded_encounter_health"], embeddings["embedded_encounter_messages"], expanded_health_profile_per_encounter, ], dim=-1, ) # Expand the messages and placeholders from C to BTC expanded_message_placeholder = self.message_placeholder[ None, None].expand( batch_size, num_history_days, embeddings["embedded_encounter_messages"].shape[-1], ) expanded_pid_placeholder = self.partner_id_placeholder[ None, None].expand( batch_size, num_history_days, embeddings["embedded_encounter_partner_ids"].shape[-1], ) expanded_duration_placeholder = self.duration_placeholder[ None, None].expand( batch_size, num_history_days, embeddings["embedded_encounter_duration"].shape[-1], ) # Expand the health profile from C to BTC expanded_health_profile_per_day = embeddings[ "embedded_health_profile"][:, None, :].expand( batch_size, num_history_days, embeddings["embedded_health_profile"].shape[-1], ) self_entities = torch.cat( [ embeddings["embedded_history_days"], expanded_pid_placeholder, expanded_duration_placeholder, embeddings["embedded_health_history"], expanded_message_placeholder, expanded_health_profile_per_day, ], dim=-1, ) # Concatenate encounter and self entities in to one big set (before passing to # the self attention blocks). In addition, expand inputs.mask to account for # masking the entire set of entities. entities = torch.cat([encounter_entities, self_entities], dim=1) expanded_mask = torch.cat( [inputs["mask"], inputs["valid_history_mask"]], dim=1) entities = self.entity_masker(entities, expanded_mask) # Grab a copy of the "meta-data", which we will be appending to entities at # every step. These meta-data are the time-stamps and partner_ids meta_data = self._get_embedding_meta_data( entities, embeddings["embedded_history_days"], embeddings["embedded_encounter_partner_ids"], embeddings["embedded_encounter_duration"], ) # Make a mask for the attention mech. This mask prevents attention between # two entities if either one of them is a padding entity. attention_mask = expanded_mask[:, :, None] * expanded_mask[:, None, :] entities = self._attention_loop(entities, meta_data, attention_mask, expanded_mask) # -------- Latent Variables pre_latent_variable = self._get_pre_latent_variable( entities, num_encounters) # Push through the latent variable MLP to get the latent variables # latent_variable.shape = BTC if not isinstance(self.latent_variable_mlp, nn.ModuleDict): latent_variable_mlps = { "latent_variable": self.latent_variable_mlp } else: latent_variable_mlps = self.latent_variable_mlp latent_variables = { key: mlp(pre_latent_variable) for key, mlp in latent_variable_mlps.items() } # -------- Generate Output Variables -------- # Process encounters to their variables pre_encounter_variables = self._get_pre_encounter_variables( entities, embeddings["embedded_history_days"], embeddings["embedded_encounter_partner_ids"], embeddings["embedded_encounter_duration"], num_encounters, ) encounter_variables = self.encounter_mlp(pre_encounter_variables) # Done: pack to an addict and return assert (not self._diagnose or not self._output_as_tuple ), "cannot produce tuple (for tracing) while diagnosing" # If legacy code expects a tuple somewhere, we only give out the first # latent variable. if self._output_as_tuple: return encounter_variables, latent_variables["latent_variable"] results = dict() results["encounter_variables"] = encounter_variables # This is still compatible with legacy code that expects a # "latent_variable" entry. results.update(latent_variables) if self._diagnose: _locals = dict(locals()) _locals.pop("results") _locals.pop("self") _locals.pop("encounter_variables") _locals.pop("latent_variable") results.update(_locals) return results
def __init__(self, cfgFile): super(DarknetTorch, self).__init__() self.sections = parse_Darknet_cfg(cfgFile) #list of dictionaries self.moduleList = module_define_torch(self.sections) self.header = torch.IntTensor([0, 0, 0, 0]) self.netparams = self.sections[0] #dictionary defining net params
def test_dtype(workers): alice, bob, james, me = (workers["bob"], workers["alice"], workers["james"], workers["me"]) # Without fix_prec x = torch.tensor([1, 2, 3]).share(alice, bob, james, dtype="long") assert ( x.child.dtype == "long" and x.child.field == 2 ** 64 and isinstance( x.child.child["alice"].location.object_store.get_obj( x.child.child["alice"].id_at_location ), torch.LongTensor, ) and (x.get() == torch.LongTensor([1, 2, 3])).all() ) x = torch.tensor([4, 5, 6]).share(alice, bob, james, dtype="int") assert ( x.child.dtype == "int" and x.child.field == 2 ** 32 and isinstance( x.child.child["alice"].location.object_store.get_obj( x.child.child["alice"].id_at_location ), torch.IntTensor, ) and (x.get() == torch.IntTensor([4, 5, 6])).all() ) # With dtype custom x = torch.tensor([1, 2, 3]).share(alice, bob, james, dtype="custom", field=67) assert ( x.child.dtype == "custom" and x.child.field == 67 and isinstance( x.child.child["alice"].location.object_store.get_obj( x.child.child["alice"].id_at_location ), torch.IntTensor, ) and (x.get() == torch.IntTensor([1, 2, 3])).all() ) # With fix_prec x = torch.tensor([1.1, 2.2, 3.3]).fix_prec().share(alice, bob, james) assert ( x.child.child.dtype == "long" and x.child.child.field == 2 ** 64 and isinstance( x.child.child.child["alice"].location.object_store.get_obj( x.child.child.child["alice"].id_at_location ), torch.LongTensor, ) and (x.get().float_prec() == torch.tensor([1.1, 2.2, 3.3])).all() ) x = torch.tensor([4.1, 5.2, 6.3]).fix_prec(dtype="int").share(alice, bob, james) assert ( x.child.child.dtype == "int" and x.child.child.field == 2 ** 32 and isinstance( x.child.child.child["alice"].location.object_store.get_obj( x.child.child.child["alice"].id_at_location ), torch.IntTensor, ) and (x.get().float_prec() == torch.tensor([4.1, 5.2, 6.3])).all() )
m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) # 创建网络模型 cnn = chsNet(1, len(alphabet) + 1) cnn.apply(weights_init) if cnn_data != '': print('loading pretrained model from %s' % cnn_data) cnn.load_state_dict({k.replace('module.', ''): v for k, v in torch.load(cnn_data).items()}) image = torch.FloatTensor(batchSize, 1, imgH, imgW) # 3 text = torch.IntTensor(batchSize * 5) length = torch.IntTensor(batchSize) if torch.cuda.is_available(): cnn = cnn.cuda() image = image.cuda() criterion = criterion.cuda() image = Variable(image) text = Variable(text) length = Variable(length) # loss averager loss_avg = utils.averager() optimizer = optim.RMSprop(cnn.parameters(), lr=lr)
for filename in findFiles('data/names/*.txt'): category = filename.split('/')[-1].split('.')[0] all_categories.append(category) lines = readLines(filename) category_lines[category] = lines ######## LSTM Configuration num_classes = 18 input_size = 57 hidden_size = 57 num_layers = 1 mini_batch = 1 seq_length = 20 hidden_size_tensor = torch.autograd.Variable(torch.IntTensor([hidden_size ]), requires_grad=False) mini_batch_tensor = torch.autograd.Variable(torch.IntTensor([mini_batch]), requires_grad=False) seq_length_tensor = torch.autograd.Variable(torch.IntTensor([seq_length]), requires_grad=False) num_layer_tensor = torch.autograd.Variable(torch.IntTensor([num_layers]), requires_grad=False) rnn = LSTM().cuda() net_dict = rnn.state_dict() pretrained_dict = {k: v for k, v in dict.items() if k in net_dict} net_dict.update(pretrained_dict) rnn.load_state_dict(pretrained_dict) ############ Test
def demo(opt): """ Model Configuration """ if 'CTC' in opt.Prediction: converter = CTCLabelConverter(opt.character) else: converter = AttnLabelConverter(opt.character) opt.num_class = len(converter.character) if opt.rgb: opt.input_channel = 3 model = Model(opt) print('model input parameters', opt.imgH, opt.imgW, opt.num_fiducial, opt.input_channel, opt.output_channel, opt.hidden_size, opt.num_class, opt.batch_max_length, opt.Transformation, opt.FeatureExtraction, opt.SequenceModeling, opt.Prediction) model = torch.nn.DataParallel(model).to(device) # load model print('loading pretrained model from %s' % opt.saved_model) model.load_state_dict(torch.load(opt.saved_model, map_location=device)) AlignCollate_demo = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD) demo_data = RawDataset(root=opt.image_folder, opt=opt) # use RawDataset demo_loader = torch.utils.data.DataLoader( demo_data, batch_size=opt.batch_size, shuffle=False, num_workers=0, # In Linux use int(opt.workers), in Windows 0 collate_fn=AlignCollate_demo, pin_memory=True) # predict model.eval() with torch.no_grad(): for image_tensors, image_path_list in demo_loader: batch_size = image_tensors.size(0) image = image_tensors.to(device) # For max length prediction length_for_pred = torch.IntTensor([opt.batch_max_length] * batch_size).to(device) text_for_pred = torch.LongTensor(batch_size, opt.batch_max_length + 1).fill_(0).to(device) if 'CTC' in opt.Prediction: preds = model(image, text_for_pred) # Select max probabilty (greedy decoding) then decode index to character preds_size = torch.IntTensor([preds.size(1)] * batch_size) _, preds_index = preds.max(2) # preds_index = preds_index.view(-1) preds_str = converter.decode(preds_index, preds_size) else: preds = model(image, text_for_pred, is_train=False) # select max probabilty (greedy decoding) then decode index to character _, preds_index = preds.max(2) preds_str = converter.decode(preds_index, length_for_pred) log = open(f'./log_demo_result.txt', 'a') dashed_line = '-' * 80 head = f'{"image_path":25s}\t{"predicted_labels":25s}\tconfidence score' print(f'{dashed_line}\n{head}\n{dashed_line}') log.write(f'{dashed_line}\n{head}\n{dashed_line}\n') preds_prob = F.softmax(preds, dim=2) preds_max_prob, _ = preds_prob.max(dim=2) for img_name, pred, pred_max_prob in zip(image_path_list, preds_str, preds_max_prob): if 'Attn' in opt.Prediction: pred_EOS = pred.find('[s]') pred = pred[:pred_EOS] # prune after "end of sentence" token ([s]) pred_max_prob = pred_max_prob[:pred_EOS] # calculate confidence score (= multiply of pred_max_prob) confidence_score = pred_max_prob.cumprod(dim=0)[-1] print(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}') log.write(f'{img_name:25s}\t{pred:25s}\t{confidence_score:0.4f}\n') log.close()
def __init__(self, cfgfile): super(Darknet, self).__init__() self.blocks = parse_cfg(cfgfile) self.net_info, self.module_list = create_modules(self.blocks) self.header = torch.IntTensor([0, 0, 0, 0]) self.seen = 0