def test(model): """ Test the weak-supervised model :param model: Pre-trained model on SynthText :return: F-score, loss """ dataloader = DataLoader(DataLoaderEvalICDAR2013('test'), batch_size=config.batch_size['train'], num_workers=8, shuffle=False) with torch.no_grad(): model.eval() iterator = tqdm(dataloader) all_accuracy = [] ground_truth = dataloader.dataset.gt for no, (image, image_name, original_dim, item) in enumerate(iterator): annots = [] for i in item: annot = ground_truth['annots'][dataloader.dataset.imnames[i]] annots.append(annot) if config.use_cuda: image = image.cuda() output = model(image) if type(output) == list: output = torch.cat(output, dim=0) output = output.data.cpu().numpy() original_dim = original_dim.cpu().numpy() f_score = [] for i in range(output.shape[0]): # --------- Resizing it back to the original image size and saving it ----------- # f_score.append( calculate_fscore( resize_bbox(original_dim[i], output[i], config)['word_bbox'][:, :, 0, :], np.array(annots[i]['bbox']), text_target=annots[i]['text'], )) # --------------- PostProcessing for creating the targets for the next iteration ---------------- # all_accuracy.append(np.mean(f_score)) iterator.set_description('F-score: ' + str(np.mean(all_accuracy))) torch.cuda.empty_cache() return np.mean(all_accuracy)
def train(model, optimizer, iteration): """ Train the weak-supervised model iteratively :param model: Pre-trained model on SynthText :param optimizer: Pre-trained model's optimizer :param iteration: current iteration of weak-supervision :return: model, optimizer """ optimizer = change_lr(optimizer, config.lr[iteration]) dataloader = DataLoader( DataLoaderMIX('train', iteration), batch_size=config.batch_size['train'], num_workers=config.num_workers['train'], shuffle=True, worker_init_fn=_init_fn, ) loss_criterian = DataParallelCriterion(Criterian()) model.train() optimizer.zero_grad() iterator = tqdm(dataloader) all_loss = [] all_precision = [] all_f_score = [] all_recall = [] all_count = [] for no, ( image, character_map, affinity_map, character_weight, affinity_weight, dataset_name, text_target, item, original_dim) in enumerate(iterator): if config.use_cuda: image, character_map, affinity_map = image.cuda(), character_map.cuda(), affinity_map.cuda() character_weight, affinity_weight = character_weight.cuda(), affinity_weight.cuda() if (no + 1) % config.change_lr == 0: optimizer = change_lr(optimizer, config.lr[iteration]*(0.8**((no + 1)//config.change_lr - 1))) output = model(image) loss = loss_criterian( output, character_map, affinity_map, character_weight, affinity_weight ).mean()/config.optimizer_iterations all_loss.append(loss.item()*config.optimizer_iterations) loss.backward() if (no + 1) % config.optimizer_iterations == 0: optimizer.step() optimizer.zero_grad() # ---------- Calculating the F-score ------------ # if (no + 1) % config.check_iterations == 0: if type(output) == list: output = torch.cat(output, dim=0) output[output < 0] = 0 output[output > 1] = 1 save(no, dataset_name, output, image, character_map, affinity_map, character_weight, affinity_weight) if (no + 1) % config.calc_f_score == 0: if type(output) == list: output = torch.cat(output, dim=0) target_ic13 = [] predicted_ic13 = [] target_text = [] current_count = 0 output = output.data.cpu().numpy() output[output > 1] = 1 output[output < 0] = 0 original_dim = original_dim.numpy() for __, _ in enumerate(dataset_name): if _ != 'SYNTH': predicted_ic13.append(resize_bbox(original_dim[__], output[__], config)['word_bbox']) target_ic13.append(np.array(dataloader.dataset.gt[item[__]][1]['word_bbox'].copy(), dtype=np.int32)) target_text.append(text_target[__].split('#@#@#@')) current_count += 1 if len(predicted_ic13) != 0: f_score, precision, recall = calculate_batch_fscore( predicted_ic13, target_ic13, text_target=target_text, threshold=config.threshold_fscore) all_f_score.append(f_score*current_count) all_precision.append(precision*current_count) all_recall.append(recall*current_count) all_count.append(current_count) # ------------- Setting Description ---------------- # if np.array(all_count)[-min(100, len(all_count)):].sum() != 0: count = np.array(all_count)[-min(100, len(all_count)):].sum() f_score = int(np.array(all_f_score)[-min(100, len(all_f_score)):].sum() * 10000 / count) / 10000 precision = int(np.array(all_precision)[-min(100, len(all_precision)):].sum() * 10000 / count) / 10000 recall = int(np.array(all_recall)[-min(100, len(all_recall)):].sum() * 10000 / count) / 10000 else: f_score = 0 precision = 0 recall = 0 # iterator.set_description( 'Loss:' + str(int(loss.item() * config.optimizer_iterations * 100000) / 100000) + ' Iterations:[' + str(no) + '/' + str(len(iterator)) + '] Average Loss:' + str( int(np.array(all_loss)[-min(1000, len(all_loss)):].mean() * 100000) / 100000) + '| Average F-Score: ' + str(f_score) + '| Average Recall: ' + str(recall) + '| Average Precision: ' + str(precision) ) if (no + 1) % config.test_now == 0: del image, loss, affinity_weight, character_weight, affinity_map, character_map, output print('\nF-score of testing: ', test(model, iteration), '\n') model.train() if len(iterator) % config.optimizer_iterations != 0: optimizer.step() optimizer.zero_grad() torch.cuda.empty_cache() return model, optimizer, all_loss, all_f_score
def test(model, iteration): """ Test the weak-supervised model :param model: Pre-trained model on SynthText :param iteration: Iteration Number :return: F-score, loss """ os.makedirs(config.save_path + '/Test_'+str(iteration), exist_ok=True) dataloader = DataLoader( DataLoaderEvalOther('test'), batch_size=config.batch_size['test'], num_workers=config.num_workers['test'], shuffle=False, worker_init_fn=_init_fn ) true_positive = 0 false_positive = 0 num_positive = 0 with torch.no_grad(): model.eval() iterator = tqdm(dataloader) all_accuracy = [] ground_truth = dataloader.dataset.gt for no, (image, image_name, original_dim, item) in enumerate(iterator): annots = [] for i in item: annot = ground_truth['annots'][dataloader.dataset.imnames[i]] annots.append(annot) if config.use_cuda: image = image.cuda() output = model(image) if type(output) == list: output = torch.cat(output, dim=0) output = output.data.cpu().numpy() output[output > 1] = 1 output[output < 0] = 0 original_dim = original_dim.cpu().numpy() f_score = [] for i in range(output.shape[0]): # --------- Resizing it back to the original image size and saving it ----------- # cur_image = denormalize_mean_variance(image[i].data.cpu().numpy().transpose(1, 2, 0)) max_dim = original_dim[i].max() resizing_factor = 768 / max_dim before_pad_dim = [int(original_dim[i][0] * resizing_factor), int(original_dim[i][1] * resizing_factor)] height_pad = (768 - before_pad_dim[0]) // 2 width_pad = (768 - before_pad_dim[1]) // 2 cur_image = cv2.resize( cur_image[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0])) cv2.drawContours(cur_image, resize_bbox(original_dim[i], output[i], config)['word_bbox'], -1, (0, 255, 0), 2) cv2.drawContours(cur_image, np.array(annots[i]['bbox']), -1, (0, 0, 255), 2) plt.imsave( config.save_path + '/Test_' + str(iteration) + '/' + image_name[i], cur_image.astype(np.uint8)) score_calc = calculate_fscore( resize_bbox(original_dim[i], output[i], config)['word_bbox'][:, :, 0, :], np.array(annots[i]['bbox']), text_target=annots[i]['text'], ) f_score.append( score_calc['f_score'] ) true_positive += score_calc['true_positive'] false_positive += score_calc['false_positive'] num_positive += score_calc['num_positive'] # --------------- PostProcessing for creating the targets for the next iteration ---------------- # all_accuracy.append(np.mean(f_score)) precision = true_positive / (true_positive + false_positive) recall = true_positive / num_positive iterator.set_description( 'F-score: ' + str(np.mean(all_accuracy)) + '| Cumulative F-score: ' + str(2*precision*recall/(precision + recall))) torch.cuda.empty_cache() return 2*precision*recall/(precision + recall), precision, recall
def train(model, optimizer, iteration): """ Train the weak-supervised model iteratively :param model: Pre-trained model on SynthText :param optimizer: Pre-trained model's optimizer :param iteration: current iteration of weak-supervision :return: model, optimizer """ def change_lr(): # Change learning rate while training for param_group in optimizer.param_groups: param_group['lr'] = config.lr[iteration] print('Learning Rate Changed to ', config.lr[iteration]) change_lr() dataloader = DataLoader(DataLoaderMIX('train', iteration), batch_size=config.batch_size['train'], num_workers=0, shuffle=True) loss_criterian = DataParallelCriterion(Criterian()) model.train() optimizer.zero_grad() iterator = tqdm(dataloader) all_loss = [] all_accuracy = [] all_count = [] for no, (image, character_map, affinity_map, character_weight, affinity_weight, dataset_name, text_target, item, original_dim) in enumerate(iterator): if config.use_cuda: image, character_map, affinity_map = image.cuda( ), character_map.cuda(), affinity_map.cuda() character_weight, affinity_weight = character_weight.cuda( ), affinity_weight.cuda() output = model(image) loss = loss_criterian( output, character_map, affinity_map, character_weight, affinity_weight).mean() / config.optimizer_iterations all_loss.append(loss.item() * config.optimizer_iterations) loss.backward() if (no + 1) % config.optimizer_iterations == 0: optimizer.step() optimizer.zero_grad() # ---------- Calculating the F-score ------------ # if type(output) == list: output = torch.cat(output, dim=0) output[output < 0] = 0 output[output > 1] = 1 target_ic13 = [] predicted_ic13 = [] target_text = [] current_count = 0 if no % config.check_iterations == 0: save(no, dataset_name, output, image, character_map, affinity_map, character_weight, affinity_weight) output = output.data.cpu().numpy() original_dim = original_dim.numpy() for __, _ in enumerate(dataset_name): if _ != 'SYNTH': predicted_ic13.append( resize_bbox(original_dim[__], output[__], config)['word_bbox']) target_ic13.append( np.array( dataloader.dataset.gt[item[__]][1]['word_bbox'].copy(), dtype=np.int32)) target_text.append(text_target[__].split('~')) current_count += 1 if len(predicted_ic13) != 0: all_accuracy.append( calculate_batch_fscore(predicted_ic13, target_ic13, text_target=target_text, threshold=config.threshold_fscore) * current_count) all_count.append(current_count) # ------------- Setting Description ---------------- # if np.array(all_count)[-min(1000, len(all_count)):].sum() != 0: f_score = int( np.array(all_accuracy)[-min(1000, len(all_accuracy)):].sum() * 100000000 / np.array( all_count)[-min(1000, len(all_count)):].sum()) / 100000000 else: f_score = 0 iterator.set_description( 'Loss:' + str( int(loss.item() * config.optimizer_iterations * 100000) / 100000) + ' Iterations:[' + str(no) + '/' + str(len(iterator)) + '] Average Loss:' + str( int( np.array(all_loss)[-min(1000, len(all_loss)):].mean() * 100000) / 100000) + '| Average F-Score: ' + str(f_score)) if len(iterator) % config.optimizer_iterations != 0: optimizer.step() optimizer.zero_grad() torch.cuda.empty_cache() return model, optimizer, all_loss, all_accuracy
def test(model): """ Test the weak-supervised model :param model: Pre-trained model on SynthText :return: F-score, loss """ dataloader = DataLoader( DataLoaderEvalOther('test'), batch_size=config.batch_size['test'], num_workers=config.num_workers['test'], shuffle=False ) true_positive = 0 false_positive = 0 num_positive = 0 with torch.no_grad(): model.eval() iterator = tqdm(dataloader) all_accuracy = [] ground_truth = dataloader.dataset.gt for no, (image, image_name, original_dim, item) in enumerate(iterator): annots = [] for i in item: annot = ground_truth['annots'][dataloader.dataset.imnames[i]] annots.append(annot) if config.use_cuda: image = image.cuda() output = model(image) if type(output) == list: output = torch.cat(output, dim=0) output = output.data.cpu().numpy() original_dim = original_dim.cpu().numpy() f_score = [] for i in range(output.shape[0]): # ToDo - Visualise the test results # ToDo - Why is F-score of testing always less than F-score of training at iteration 0? # --------- Resizing it back to the original image size and saving it ----------- # # cur_image = denormalize_mean_variance(image[i].data.cpu().numpy().transpose(1, 2, 0)) # # max_dim = original_dim[i].max() # resizing_factor = 768 / max_dim # before_pad_dim = [int(original_dim[i][0] * resizing_factor), int(original_dim[i][1] * resizing_factor)] # # height_pad = (768 - before_pad_dim[0]) // 2 # width_pad = (768 - before_pad_dim[1]) // 2 # # cur_image_backup = cv2.resize( # cur_image[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], # (original_dim[i][1], original_dim[i][0])) # # cur_image = cur_image_backup.copy() # # cv2.drawContours(cur_image, resize_bbox(original_dim[i], output[i], config)['word_bbox'], -1, (0, 255, 0), 2) # plt.imsave(str(i)+'_predicted.png', cur_image.astype(np.uint8)) # # cur_image = cur_image_backup.copy() # cv2.drawContours(cur_image, np.array(annots[i]['bbox']), -1, (0, 255, 0), 2) # plt.imsave(str(i) + '_target.png', cur_image.astype(np.uint8)) score_calc = calculate_fscore( resize_bbox(original_dim[i], output[i], config)['word_bbox'][:, :, 0, :], np.array(annots[i]['bbox']), text_target=annots[i]['text'], ) f_score.append( score_calc['f_score'] ) true_positive += score_calc['true_positive'] false_positive += score_calc['false_positive'] num_positive += score_calc['num_positive'] # --------------- PostProcessing for creating the targets for the next iteration ---------------- # # exit(0) all_accuracy.append(np.mean(f_score)) precision = true_positive / (true_positive + false_positive) recall = true_positive / num_positive iterator.set_description( 'F-score: ' + str(np.mean(all_accuracy)) + '| Cumulative F-score: ' + str(2*precision*recall/(precision + recall))) torch.cuda.empty_cache() return np.mean(all_accuracy)