def train(model, optimizer, iteration): """ Train the weak-supervised model iteratively :param model: Pre-trained model on SynthText :param optimizer: Pre-trained model's optimizer :param iteration: current iteration of weak-supervision :return: model, optimizer """ print('Calling this dup') def change_lr(): # Change learning rate while training for param_group in optimizer.param_groups: param_group['lr'] = config.lr[iteration] print('Learning Rate Changed to ', config.lr[iteration]) change_lr() dataloader = DataLoader( DataLoaderMIX_JPN( 'train', iteration), batch_size=config.batch_size['train'], num_workers=config.num_workers['train'], shuffle=True) loss_criterian = DataParallelCriterion(Criterian()) model.train() optimizer.zero_grad() iterator = tqdm(dataloader)
def main(): seed() copyfile('train_synth/config.py', config.save_path + '/config.py') if config.model_architecture == 'UNET_ResNet': from src.UNET_ResNet import UNetWithResnet50Encoder model = UNetWithResnet50Encoder() else: from src.craft_model import CRAFT model = CRAFT() model_parameters = filter(lambda p: p.requires_grad, model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print('Total number of trainable parameters: ', params) model = DataParallelModel(model) loss_criterian = DataParallelCriterion(Criterian()) if config.use_cuda: model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=config.lr[1]) if config.pretrained: saved_model = torch.load(config.pretrained_path) model.load_state_dict(saved_model['state_dict']) optimizer.load_state_dict(saved_model['optimizer']) starting_no = int(config.pretrained_path.split('/')[-1].split('_')[0]) all_loss = np.load(config.pretrained_loss_plot_training).tolist() print('Loaded the model') else: starting_no = 0 all_loss = [] all_accuracy = [] print('Loading the dataloader') train_dataloader = DataLoaderSYNTH('train') train_dataloader = DataLoader( train_dataloader, batch_size=config.batch_size['train'], shuffle=True, num_workers=config.num_workers['train']) print('Loaded the dataloader') all_loss = train( train_dataloader, loss_criterian, model, optimizer, starting_no=starting_no, all_loss=all_loss, all_accuracy=all_accuracy) torch.save( { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, config.save_path + '/final_model.pkl') np.save(config.save_path + '/loss_plot_training.npy', all_loss) plt.plot(all_loss) plt.savefig(config.save_path + '/loss_plot_training.png') plt.clf() print("Saved Final Model")
def train(model, optimizer, iteration): """ Train the weak-supervised model iteratively :param model: Pre-trained model on SynthText :param optimizer: Pre-trained model's optimizer :param iteration: current iteration of weak-supervision :return: model, optimizer """ optimizer = change_lr(optimizer, config.lr[iteration]) dataloader = DataLoader( DataLoaderMIX('train', iteration), batch_size=config.batch_size['train'], num_workers=config.num_workers['train'], shuffle=True, worker_init_fn=_init_fn, ) loss_criterian = DataParallelCriterion(Criterian()) model.train() optimizer.zero_grad() iterator = tqdm(dataloader) all_loss = [] all_precision = [] all_f_score = [] all_recall = [] all_count = [] for no, ( image, character_map, affinity_map, character_weight, affinity_weight, dataset_name, text_target, item, original_dim) in enumerate(iterator): if config.use_cuda: image, character_map, affinity_map = image.cuda(), character_map.cuda(), affinity_map.cuda() character_weight, affinity_weight = character_weight.cuda(), affinity_weight.cuda() if (no + 1) % config.change_lr == 0: optimizer = change_lr(optimizer, config.lr[iteration]*(0.8**((no + 1)//config.change_lr - 1))) output = model(image) loss = loss_criterian( output, character_map, affinity_map, character_weight, affinity_weight ).mean()/config.optimizer_iterations all_loss.append(loss.item()*config.optimizer_iterations) loss.backward() if (no + 1) % config.optimizer_iterations == 0: optimizer.step() optimizer.zero_grad() # ---------- Calculating the F-score ------------ # if (no + 1) % config.check_iterations == 0: if type(output) == list: output = torch.cat(output, dim=0) output[output < 0] = 0 output[output > 1] = 1 save(no, dataset_name, output, image, character_map, affinity_map, character_weight, affinity_weight) if (no + 1) % config.calc_f_score == 0: if type(output) == list: output = torch.cat(output, dim=0) target_ic13 = [] predicted_ic13 = [] target_text = [] current_count = 0 output = output.data.cpu().numpy() output[output > 1] = 1 output[output < 0] = 0 original_dim = original_dim.numpy() for __, _ in enumerate(dataset_name): if _ != 'SYNTH': predicted_ic13.append(resize_bbox(original_dim[__], output[__], config)['word_bbox']) target_ic13.append(np.array(dataloader.dataset.gt[item[__]][1]['word_bbox'].copy(), dtype=np.int32)) target_text.append(text_target[__].split('#@#@#@')) current_count += 1 if len(predicted_ic13) != 0: f_score, precision, recall = calculate_batch_fscore( predicted_ic13, target_ic13, text_target=target_text, threshold=config.threshold_fscore) all_f_score.append(f_score*current_count) all_precision.append(precision*current_count) all_recall.append(recall*current_count) all_count.append(current_count) # ------------- Setting Description ---------------- # if np.array(all_count)[-min(100, len(all_count)):].sum() != 0: count = np.array(all_count)[-min(100, len(all_count)):].sum() f_score = int(np.array(all_f_score)[-min(100, len(all_f_score)):].sum() * 10000 / count) / 10000 precision = int(np.array(all_precision)[-min(100, len(all_precision)):].sum() * 10000 / count) / 10000 recall = int(np.array(all_recall)[-min(100, len(all_recall)):].sum() * 10000 / count) / 10000 else: f_score = 0 precision = 0 recall = 0 # iterator.set_description( 'Loss:' + str(int(loss.item() * config.optimizer_iterations * 100000) / 100000) + ' Iterations:[' + str(no) + '/' + str(len(iterator)) + '] Average Loss:' + str( int(np.array(all_loss)[-min(1000, len(all_loss)):].mean() * 100000) / 100000) + '| Average F-Score: ' + str(f_score) + '| Average Recall: ' + str(recall) + '| Average Precision: ' + str(precision) ) if (no + 1) % config.test_now == 0: del image, loss, affinity_weight, character_weight, affinity_map, character_map, output print('\nF-score of testing: ', test(model, iteration), '\n') model.train() if len(iterator) % config.optimizer_iterations != 0: optimizer.step() optimizer.zero_grad() torch.cuda.empty_cache() return model, optimizer, all_loss, all_f_score
def train(model, optimizer, iteration): """ Train the weak-supervised model iteratively :param model: Pre-trained model on SynthText :param optimizer: Pre-trained model's optimizer :param iteration: current iteration of weak-supervision :return: model, optimizer """ def change_lr(): # Change learning rate while training for param_group in optimizer.param_groups: param_group['lr'] = config.lr[iteration] print('Learning Rate Changed to ', config.lr[iteration]) change_lr() dataloader = DataLoader(DataLoaderMIX('train', iteration), batch_size=config.batch_size['train'], num_workers=0, shuffle=True) loss_criterian = DataParallelCriterion(Criterian()) model.train() optimizer.zero_grad() iterator = tqdm(dataloader) all_loss = [] all_accuracy = [] all_count = [] for no, (image, character_map, affinity_map, character_weight, affinity_weight, dataset_name, text_target, item, original_dim) in enumerate(iterator): if config.use_cuda: image, character_map, affinity_map = image.cuda( ), character_map.cuda(), affinity_map.cuda() character_weight, affinity_weight = character_weight.cuda( ), affinity_weight.cuda() output = model(image) loss = loss_criterian( output, character_map, affinity_map, character_weight, affinity_weight).mean() / config.optimizer_iterations all_loss.append(loss.item() * config.optimizer_iterations) loss.backward() if (no + 1) % config.optimizer_iterations == 0: optimizer.step() optimizer.zero_grad() # ---------- Calculating the F-score ------------ # if type(output) == list: output = torch.cat(output, dim=0) output[output < 0] = 0 output[output > 1] = 1 target_ic13 = [] predicted_ic13 = [] target_text = [] current_count = 0 if no % config.check_iterations == 0: save(no, dataset_name, output, image, character_map, affinity_map, character_weight, affinity_weight) output = output.data.cpu().numpy() original_dim = original_dim.numpy() for __, _ in enumerate(dataset_name): if _ != 'SYNTH': predicted_ic13.append( resize_bbox(original_dim[__], output[__], config)['word_bbox']) target_ic13.append( np.array( dataloader.dataset.gt[item[__]][1]['word_bbox'].copy(), dtype=np.int32)) target_text.append(text_target[__].split('~')) current_count += 1 if len(predicted_ic13) != 0: all_accuracy.append( calculate_batch_fscore(predicted_ic13, target_ic13, text_target=target_text, threshold=config.threshold_fscore) * current_count) all_count.append(current_count) # ------------- Setting Description ---------------- # if np.array(all_count)[-min(1000, len(all_count)):].sum() != 0: f_score = int( np.array(all_accuracy)[-min(1000, len(all_accuracy)):].sum() * 100000000 / np.array( all_count)[-min(1000, len(all_count)):].sum()) / 100000000 else: f_score = 0 iterator.set_description( 'Loss:' + str( int(loss.item() * config.optimizer_iterations * 100000) / 100000) + ' Iterations:[' + str(no) + '/' + str(len(iterator)) + '] Average Loss:' + str( int( np.array(all_loss)[-min(1000, len(all_loss)):].mean() * 100000) / 100000) + '| Average F-Score: ' + str(f_score)) if len(iterator) % config.optimizer_iterations != 0: optimizer.step() optimizer.zero_grad() torch.cuda.empty_cache() return model, optimizer, all_loss, all_accuracy
def train(model, optimizer, iteration): """ Train the weak-supervised model iteratively :param model: Pre-trained model on SynthText :param optimizer: Pre-trained model's optimizer :param iteration: current iteration of weak-supervision :return: model, optimizer """ def change_lr(): # Change learning rate while training for param_group in optimizer.param_groups: param_group['lr'] = config.lr[iteration] print('Learning Rate Changed to ', config.lr[iteration]) change_lr() dataloader = DataLoader( DataLoaderMIX('train', iteration), batch_size=config.batch_size['train'], num_workers=8, shuffle=True) loss_criterian = DataParallelCriterion(Criterian()) model.train() optimizer.zero_grad() iterator = tqdm(dataloader) all_loss = [] all_accuracy = [] all_count = [] ground_truth = iterator.iterable.dataset.gt for no, (image, character_map, affinity_map, character_weight, affinity_weight, word_bbox, original_dim) in \ enumerate(iterator): if config.use_cuda: image, character_map, affinity_map = image.cuda(), character_map.cuda(), affinity_map.cuda() character_weight, affinity_weight = character_weight.cuda(), affinity_weight.cuda() output = model(image) loss = loss_criterian(output, character_map, affinity_map, character_weight, affinity_weight).mean()/4 all_loss.append(loss.item()*4) loss.backward() if (no + 1) % 4 == 0: optimizer.step() optimizer.zero_grad() # ---------- Calculating the F-score ------------ # if type(output) == list: output = torch.cat(output, dim=0) output = output.data.cpu().numpy() # image = image.data.cpu().numpy() original_dim = original_dim.cpu().numpy() target_bbox = [] predicted_ic13 = [] current_count = 0 word_bbox = word_bbox.numpy() for __, _ in enumerate(word_bbox): if _[1] == 1: # ToDo - Understand why model.train() gives poor results but model.eval() with torch.no_grad() gives better results max_dim = original_dim[__].max() resizing_factor = 768 / max_dim before_pad_dim = [int(original_dim[__][0] * resizing_factor), int(original_dim[__][1] * resizing_factor)] output[__, :, :, :] = np.uint8(output[__, :, :, :] * 255) height_pad = (768 - before_pad_dim[0]) // 2 width_pad = (768 - before_pad_dim[1]) // 2 character_bbox = cv2.resize( output[__, 0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[__][1], original_dim[__][0])) / 255 affinity_bbox = cv2.resize( output[__, 1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[__][1], original_dim[__][0])) / 255 predicted_bbox = generate_word_bbox( character_bbox, affinity_bbox, character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity, word_threshold=config.threshold_word)['word_bbox'] predicted_ic13.append(predicted_bbox) target_bbox.append(np.array(ground_truth[_[0] % len(ground_truth)][1]['word_bbox'], dtype=np.int64)) current_count += 1 all_accuracy.append( calculate_batch_fscore( predicted_ic13, target_bbox, threshold=config.threshold_fscore)*current_count ) all_count.append(current_count) # ------------- Setting Description ---------------- # if np.array(all_count)[-min(1000, len(all_count)):].sum() != 0: f_score = int( np.array(all_accuracy)[-min(1000, len(all_accuracy)):].sum() * 100000000 / np.array(all_count)[-min(1000, len(all_count)):].sum()) / 100000000 else: f_score = 0 iterator.set_description( 'Loss:' + str(int(loss.item() * 4 * 100000) / 100000) + ' Iterations:[' + str(no) + '/' + str( len(iterator)) + '] Average Loss:' + str( int(np.array(all_loss)[-min(1000, len(all_loss)):].mean() * 100000) / 100000) + '| Average F-Score: ' + str(f_score) ) if len(iterator) % 4 != 0: optimizer.step() optimizer.zero_grad() torch.cuda.empty_cache() return model, optimizer, all_loss, all_accuracy