def test(dataloader, loss_criterian, model): """ Function to test :param dataloader: Pytorch dataloader :param loss_criterian: Loss function with OHNM using MSE Loss :param model: Pytorch model of UNet-ResNet :return: all iteration loss values """ with torch.no_grad(): # For no gradient calculation model.eval() iterator = tqdm(dataloader) all_loss = [] all_accuracy = [] for no, (image, weight, weight_affinity) in enumerate(iterator): if config.use_cuda: image, weight, weight_affinity = image.cuda(), weight.cuda(), weight_affinity.cuda() output = model(image) loss = loss_criterian(output, weight, weight_affinity).mean() all_loss.append(loss.item()) if type(output) == list: output = torch.cat(output, dim=0) predicted_bbox = generate_word_bbox_batch( output[:, 0, :, :].data.cpu().numpy(), output[:, 1, :, :].data.cpu().numpy(), character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity) target_bbox = generate_word_bbox_batch( weight.data.cpu().numpy(), weight_affinity.data.cpu().numpy(), character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity) all_accuracy.append(calculate_batch_fscore(predicted_bbox, target_bbox, threshold=config.threshold_fscore)) iterator.set_description( 'Loss:' + str(int(loss.item() * 100000000) / 100000000) + ' Iterations:[' + str(no) + '/' + str( len(iterator)) + '] Average Loss:' + str(int(np.array(all_loss)[-min(1000, len(all_loss)):].mean()*100000000)/100000000) + '| Average F-Score: ' + str(int(np.array(all_accuracy)[-min(1000, len(all_accuracy)):].mean()*100000000)/100000000) ) if no % config.periodic_output == 0 and no != 0: if type(output) == list: output = torch.cat(output, dim=0) save(image, output, weight, weight_affinity, no) return all_loss
def train(dataloader, loss_criterian, model, optimizer, starting_no, all_loss, all_accuracy): """ Function to test :param dataloader: Pytorch dataloader :param loss_criterian: Loss function with OHNM using MSE Loss :param model: Pytorch model of UNet-ResNet :param optimizer: Adam Optimizer :param starting_no: how many items to skip in the dataloader :param all_loss: list of all loss values :param all_accuracy: list of all f-scores :return: all iteration loss values """ model.train() optimizer.zero_grad() iterator = tqdm(dataloader) def change_lr(no_i): for i in config.lr: if i == no_i: print('Learning Rate Changed to ', config.lr[i]) for param_group in optimizer.param_groups: param_group['lr'] = config.lr[i] for no, (image, weight, weight_affinity) in enumerate(iterator): change_lr(no) if config.pretrained: if no == starting_no: dataloader.start = True continue elif no < starting_no: continue if config.use_cuda: image, weight, weight_affinity = image.cuda(), weight.cuda(), weight_affinity.cuda() output = model(image) loss = loss_criterian(output, weight, weight_affinity).mean()/4 all_loss.append(loss.item()*config.optimizer_iteration) loss.backward() if (no + 1) % config.optimizer_iteration == 0: optimizer.step() optimizer.zero_grad() if no >= 0: # Calculating the f-score after some iterations because initially there are a lot of stray contours if no % config.periodic_fscore == 0: if type(output) == list: output = torch.cat(output, dim=0) predicted_bbox = generate_word_bbox_batch( output[:, 0, :, :].data.cpu().numpy(), output[:, 1, :, :].data.cpu().numpy(), character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity, word_threshold=config.threshold_word, ) target_bbox = generate_word_bbox_batch( weight.data.cpu().numpy(), weight_affinity.data.cpu().numpy(), character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity, word_threshold=config.threshold_word, ) all_accuracy.append( calculate_batch_fscore( predicted_bbox, target_bbox, threshold=config.threshold_fscore, text_target=None ) ) if len(all_accuracy) == 0: iterator.set_description( 'Loss:' + str( int(loss.item() * config.optimizer_iteration * 100000000) / 100000000) + ' Iterations:[' + str(no) + '/' + str( len(iterator)) + '] Average Loss:' + str(int(np.array(all_loss)[-min(1000, len(all_loss)):].mean() * 100000000) / 100000000)) else: iterator.set_description( 'Loss:' + str(int(loss.item() * config.optimizer_iteration * 100000000) / 100000000) + ' Iterations:[' + str(no) + '/' + str(len(iterator)) + '] Average Loss:' + str(int(np.array(all_loss)[-min(1000, len(all_loss)):].mean()*100000000)/100000000) + '| Average F-Score: ' + str(int(np.array(all_accuracy)[-min(1000, len(all_accuracy)):].mean()*100000000)/100000000) ) if no % config.periodic_output == 0: if type(output) == list: output = torch.cat(output, dim=0) save(image, output, weight, weight_affinity, no) if no % config.periodic_save == 0: torch.save( { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, config.save_path + '/' + str(no) + '_model.pkl') np.save(config.save_path + '/loss_plot_training.npy', all_loss) plt.plot(all_loss) plt.savefig(config.save_path + '/loss_plot_training.png') plt.clf() return all_loss
def train(model, optimizer, iteration): """ Train the weak-supervised model iteratively :param model: Pre-trained model on SynthText :param optimizer: Pre-trained model's optimizer :param iteration: current iteration of weak-supervision :return: model, optimizer """ optimizer = change_lr(optimizer, config.lr[iteration]) dataloader = DataLoader( DataLoaderMIX('train', iteration), batch_size=config.batch_size['train'], num_workers=config.num_workers['train'], shuffle=True, worker_init_fn=_init_fn, ) loss_criterian = DataParallelCriterion(Criterian()) model.train() optimizer.zero_grad() iterator = tqdm(dataloader) all_loss = [] all_precision = [] all_f_score = [] all_recall = [] all_count = [] for no, ( image, character_map, affinity_map, character_weight, affinity_weight, dataset_name, text_target, item, original_dim) in enumerate(iterator): if config.use_cuda: image, character_map, affinity_map = image.cuda(), character_map.cuda(), affinity_map.cuda() character_weight, affinity_weight = character_weight.cuda(), affinity_weight.cuda() if (no + 1) % config.change_lr == 0: optimizer = change_lr(optimizer, config.lr[iteration]*(0.8**((no + 1)//config.change_lr - 1))) output = model(image) loss = loss_criterian( output, character_map, affinity_map, character_weight, affinity_weight ).mean()/config.optimizer_iterations all_loss.append(loss.item()*config.optimizer_iterations) loss.backward() if (no + 1) % config.optimizer_iterations == 0: optimizer.step() optimizer.zero_grad() # ---------- Calculating the F-score ------------ # if (no + 1) % config.check_iterations == 0: if type(output) == list: output = torch.cat(output, dim=0) output[output < 0] = 0 output[output > 1] = 1 save(no, dataset_name, output, image, character_map, affinity_map, character_weight, affinity_weight) if (no + 1) % config.calc_f_score == 0: if type(output) == list: output = torch.cat(output, dim=0) target_ic13 = [] predicted_ic13 = [] target_text = [] current_count = 0 output = output.data.cpu().numpy() output[output > 1] = 1 output[output < 0] = 0 original_dim = original_dim.numpy() for __, _ in enumerate(dataset_name): if _ != 'SYNTH': predicted_ic13.append(resize_bbox(original_dim[__], output[__], config)['word_bbox']) target_ic13.append(np.array(dataloader.dataset.gt[item[__]][1]['word_bbox'].copy(), dtype=np.int32)) target_text.append(text_target[__].split('#@#@#@')) current_count += 1 if len(predicted_ic13) != 0: f_score, precision, recall = calculate_batch_fscore( predicted_ic13, target_ic13, text_target=target_text, threshold=config.threshold_fscore) all_f_score.append(f_score*current_count) all_precision.append(precision*current_count) all_recall.append(recall*current_count) all_count.append(current_count) # ------------- Setting Description ---------------- # if np.array(all_count)[-min(100, len(all_count)):].sum() != 0: count = np.array(all_count)[-min(100, len(all_count)):].sum() f_score = int(np.array(all_f_score)[-min(100, len(all_f_score)):].sum() * 10000 / count) / 10000 precision = int(np.array(all_precision)[-min(100, len(all_precision)):].sum() * 10000 / count) / 10000 recall = int(np.array(all_recall)[-min(100, len(all_recall)):].sum() * 10000 / count) / 10000 else: f_score = 0 precision = 0 recall = 0 # iterator.set_description( 'Loss:' + str(int(loss.item() * config.optimizer_iterations * 100000) / 100000) + ' Iterations:[' + str(no) + '/' + str(len(iterator)) + '] Average Loss:' + str( int(np.array(all_loss)[-min(1000, len(all_loss)):].mean() * 100000) / 100000) + '| Average F-Score: ' + str(f_score) + '| Average Recall: ' + str(recall) + '| Average Precision: ' + str(precision) ) if (no + 1) % config.test_now == 0: del image, loss, affinity_weight, character_weight, affinity_map, character_map, output print('\nF-score of testing: ', test(model, iteration), '\n') model.train() if len(iterator) % config.optimizer_iterations != 0: optimizer.step() optimizer.zero_grad() torch.cuda.empty_cache() return model, optimizer, all_loss, all_f_score
def train(model, optimizer, iteration): """ Train the weak-supervised model iteratively :param model: Pre-trained model on SynthText :param optimizer: Pre-trained model's optimizer :param iteration: current iteration of weak-supervision :return: model, optimizer """ def change_lr(): # Change learning rate while training for param_group in optimizer.param_groups: param_group['lr'] = config.lr[iteration] print('Learning Rate Changed to ', config.lr[iteration]) change_lr() dataloader = DataLoader(DataLoaderMIX('train', iteration), batch_size=config.batch_size['train'], num_workers=0, shuffle=True) loss_criterian = DataParallelCriterion(Criterian()) model.train() optimizer.zero_grad() iterator = tqdm(dataloader) all_loss = [] all_accuracy = [] all_count = [] for no, (image, character_map, affinity_map, character_weight, affinity_weight, dataset_name, text_target, item, original_dim) in enumerate(iterator): if config.use_cuda: image, character_map, affinity_map = image.cuda( ), character_map.cuda(), affinity_map.cuda() character_weight, affinity_weight = character_weight.cuda( ), affinity_weight.cuda() output = model(image) loss = loss_criterian( output, character_map, affinity_map, character_weight, affinity_weight).mean() / config.optimizer_iterations all_loss.append(loss.item() * config.optimizer_iterations) loss.backward() if (no + 1) % config.optimizer_iterations == 0: optimizer.step() optimizer.zero_grad() # ---------- Calculating the F-score ------------ # if type(output) == list: output = torch.cat(output, dim=0) output[output < 0] = 0 output[output > 1] = 1 target_ic13 = [] predicted_ic13 = [] target_text = [] current_count = 0 if no % config.check_iterations == 0: save(no, dataset_name, output, image, character_map, affinity_map, character_weight, affinity_weight) output = output.data.cpu().numpy() original_dim = original_dim.numpy() for __, _ in enumerate(dataset_name): if _ != 'SYNTH': predicted_ic13.append( resize_bbox(original_dim[__], output[__], config)['word_bbox']) target_ic13.append( np.array( dataloader.dataset.gt[item[__]][1]['word_bbox'].copy(), dtype=np.int32)) target_text.append(text_target[__].split('~')) current_count += 1 if len(predicted_ic13) != 0: all_accuracy.append( calculate_batch_fscore(predicted_ic13, target_ic13, text_target=target_text, threshold=config.threshold_fscore) * current_count) all_count.append(current_count) # ------------- Setting Description ---------------- # if np.array(all_count)[-min(1000, len(all_count)):].sum() != 0: f_score = int( np.array(all_accuracy)[-min(1000, len(all_accuracy)):].sum() * 100000000 / np.array( all_count)[-min(1000, len(all_count)):].sum()) / 100000000 else: f_score = 0 iterator.set_description( 'Loss:' + str( int(loss.item() * config.optimizer_iterations * 100000) / 100000) + ' Iterations:[' + str(no) + '/' + str(len(iterator)) + '] Average Loss:' + str( int( np.array(all_loss)[-min(1000, len(all_loss)):].mean() * 100000) / 100000) + '| Average F-Score: ' + str(f_score)) if len(iterator) % config.optimizer_iterations != 0: optimizer.step() optimizer.zero_grad() torch.cuda.empty_cache() return model, optimizer, all_loss, all_accuracy
def train(model, optimizer, iteration): """ Train the weak-supervised model iteratively :param model: Pre-trained model on SynthText :param optimizer: Pre-trained model's optimizer :param iteration: current iteration of weak-supervision :return: model, optimizer """ dataloader = DataLoader(DataLoaderMIX('train', iteration), batch_size=config.batch_size['train'], num_workers=8) loss_criterian = DataParallelCriterion(Criterian()) model.train() optimizer.zero_grad() iterator = tqdm(dataloader) """ Currently not changing the learning rate while weak supervision def change_lr(no): # Change learning rate while training for i in config.lr: if i == no: print('Learning Rate Changed to ', config.lr[i]) for param_group in optimizer.param_groups: param_group['lr'] = config.lr[i] change_lr(1) """ all_loss = [] all_accuracy = [] for no, (image, character_map, affinity_map, character_weight, affinity_weight) in enumerate(iterator): if config.use_cuda: image, character_map, affinity_map = image.cuda( ), character_map.cuda(), affinity_map.cuda() character_weight, affinity_weight = character_weight.cuda( ), affinity_weight.cuda() output = model(image) loss = loss_criterian(output, character_map, affinity_map, character_weight, affinity_weight).mean() all_loss.append(loss.item()) loss.backward() optimizer.step() optimizer.zero_grad() if len(all_accuracy) == 0: iterator.set_description( 'Loss:' + str(int(loss.item() * 100000000) / 100000000) + ' Iterations:[' + str(no) + '/' + str(len(iterator)) + '] Average Loss:' + str( int( np.array(all_loss)[-min(1000, len(all_loss)):].mean() * 100000000) / 100000000)) else: iterator.set_description( 'Loss:' + str(int(loss.item() * 100000000) / 100000000) + ' Iterations:[' + str(no) + '/' + str(len(iterator)) + '] Average Loss:' + str( int( np.array(all_loss)[-min(1000, len(all_loss)):].mean() * 100000000) / 100000000) + '| Average F-Score: ' + str( int( np.array(all_accuracy)[-min(1000, len(all_accuracy)):]. mean() * 100000000) / 100000000)) # ---------- Calculating the F-score ------------ # if type(output) == list: output = torch.cat(output, dim=0) predicted_bbox = generate_word_bbox_batch( output[:, 0, :, :].data.cpu().numpy(), output[:, 1, :, :].data.cpu().numpy(), character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity) target_bbox = generate_word_bbox_batch( character_map.data.cpu().numpy(), affinity_map.data.cpu().numpy(), character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity) all_accuracy.append( calculate_batch_fscore(predicted_bbox, target_bbox, threshold=config.threshold_fscore)) torch.cuda.empty_cache() return model, optimizer
def train(model, optimizer, iteration): """ Train the weak-supervised model iteratively :param model: Pre-trained model on SynthText :param optimizer: Pre-trained model's optimizer :param iteration: current iteration of weak-supervision :return: model, optimizer """ def change_lr(): # Change learning rate while training for param_group in optimizer.param_groups: param_group['lr'] = config.lr[iteration] print('Learning Rate Changed to ', config.lr[iteration]) change_lr() dataloader = DataLoader( DataLoaderMIX('train', iteration), batch_size=config.batch_size['train'], num_workers=8, shuffle=True) loss_criterian = DataParallelCriterion(Criterian()) model.train() optimizer.zero_grad() iterator = tqdm(dataloader) all_loss = [] all_accuracy = [] all_count = [] ground_truth = iterator.iterable.dataset.gt for no, (image, character_map, affinity_map, character_weight, affinity_weight, word_bbox, original_dim) in \ enumerate(iterator): if config.use_cuda: image, character_map, affinity_map = image.cuda(), character_map.cuda(), affinity_map.cuda() character_weight, affinity_weight = character_weight.cuda(), affinity_weight.cuda() output = model(image) loss = loss_criterian(output, character_map, affinity_map, character_weight, affinity_weight).mean()/4 all_loss.append(loss.item()*4) loss.backward() if (no + 1) % 4 == 0: optimizer.step() optimizer.zero_grad() # ---------- Calculating the F-score ------------ # if type(output) == list: output = torch.cat(output, dim=0) output = output.data.cpu().numpy() # image = image.data.cpu().numpy() original_dim = original_dim.cpu().numpy() target_bbox = [] predicted_ic13 = [] current_count = 0 word_bbox = word_bbox.numpy() for __, _ in enumerate(word_bbox): if _[1] == 1: # ToDo - Understand why model.train() gives poor results but model.eval() with torch.no_grad() gives better results max_dim = original_dim[__].max() resizing_factor = 768 / max_dim before_pad_dim = [int(original_dim[__][0] * resizing_factor), int(original_dim[__][1] * resizing_factor)] output[__, :, :, :] = np.uint8(output[__, :, :, :] * 255) height_pad = (768 - before_pad_dim[0]) // 2 width_pad = (768 - before_pad_dim[1]) // 2 character_bbox = cv2.resize( output[__, 0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[__][1], original_dim[__][0])) / 255 affinity_bbox = cv2.resize( output[__, 1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[__][1], original_dim[__][0])) / 255 predicted_bbox = generate_word_bbox( character_bbox, affinity_bbox, character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity, word_threshold=config.threshold_word)['word_bbox'] predicted_ic13.append(predicted_bbox) target_bbox.append(np.array(ground_truth[_[0] % len(ground_truth)][1]['word_bbox'], dtype=np.int64)) current_count += 1 all_accuracy.append( calculate_batch_fscore( predicted_ic13, target_bbox, threshold=config.threshold_fscore)*current_count ) all_count.append(current_count) # ------------- Setting Description ---------------- # if np.array(all_count)[-min(1000, len(all_count)):].sum() != 0: f_score = int( np.array(all_accuracy)[-min(1000, len(all_accuracy)):].sum() * 100000000 / np.array(all_count)[-min(1000, len(all_count)):].sum()) / 100000000 else: f_score = 0 iterator.set_description( 'Loss:' + str(int(loss.item() * 4 * 100000) / 100000) + ' Iterations:[' + str(no) + '/' + str( len(iterator)) + '] Average Loss:' + str( int(np.array(all_loss)[-min(1000, len(all_loss)):].mean() * 100000) / 100000) + '| Average F-Score: ' + str(f_score) ) if len(iterator) % 4 != 0: optimizer.step() optimizer.zero_grad() torch.cuda.empty_cache() return model, optimizer, all_loss, all_accuracy