def synthesize_with_score(dataloader, model, base_target_path): """ Given a path to a set of images(icdar 2013 dataset), and path to a pre-trained model, generate the character heatmap and affinity heatmap and a json of all the annotations :param dataloader: dataloader for icdar 2013 dataset :param model: pre-trained model :param base_target_path: path where to store the predictions :return: """ with torch.no_grad(): model.eval() iterator = tqdm(dataloader) for no, (image, image_name, original_dim, item) in enumerate(iterator): annots = [] for i in item: annot = dataloader.dataset.gt['annots'][ dataloader.dataset.imnames[i]] annots.append(annot) if config.use_cuda: image = image.cuda() output = model(image) if type(output) == list: output = torch.cat(output, dim=0) output = output.data.cpu().numpy() original_dim = original_dim.cpu().numpy() for i in range(output.shape[0]): # --------- Resizing it back to the original image size and saving it ----------- # max_dim = original_dim[i].max() resizing_factor = 768 / max_dim before_pad_dim = [ int(original_dim[i][0] * resizing_factor), int(original_dim[i][1] * resizing_factor) ] output[i, :, :, :] = np.uint8(output[i, :, :, :] * 255) height_pad = (768 - before_pad_dim[0]) // 2 width_pad = (768 - before_pad_dim[1]) // 2 character_bbox = cv2.resize( output[i, 0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0])) / 255 affinity_bbox = cv2.resize( output[i, 1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0])) / 255 image_i = (image[i].data.cpu().numpy() * 255).astype( np.uint8).transpose(1, 2, 0) image_i = cv2.resize( image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0])) image_i_backup = image_i.copy() # Generating word-bbox given character and affinity heatmap generated_targets = generate_word_bbox( character_bbox, affinity_bbox, character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity) if 'error_message' in generated_targets.keys(): print('There was an error while generating the target of ', image_name[i]) print('Error:', generated_targets['error_message']) continue if config.visualize_generated: # Saving affinity heat map plt.imsave( base_target_path + '_predicted/affinity/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', np.float32(affinity_bbox > config.threshold_affinity), cmap='gray') # Saving character heat map plt.imsave( base_target_path + '_predicted/character/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', np.float32( character_bbox > config.threshold_character), cmap='gray') cv2.drawContours(image_i, generated_targets['word_bbox'], -1, (0, 255, 0), 2) # Saving word bbox drawn on the original image plt.imsave( base_target_path + '_predicted/word_bbox/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', image_i) # --------------- PostProcessing for creating the targets for the next iteration ---------------- # generated_targets = get_weighted_character_target( generated_targets, { 'bbox': annots[i]['bbox'], 'text': annots[i]['text'] }, dataloader.dataset.unknown, config.threshold_fscore) if config.visualize_generated: image_i = (image[i].data.cpu().numpy() * 255).astype( np.uint8).transpose(1, 2, 0) image_i = cv2.resize( image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0])) # Generated word_bbox after postprocessing cv2.drawContours(image_i, generated_targets['word_bbox'], -1, (0, 255, 0), 2) # Saving word bbox after postprocessing plt.imsave( base_target_path + '_next_target/word_bbox/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', image_i) # Generate affinity heatmap after postprocessing affinity_target, affinity_weight_map = generate_target_others( (image_i.shape[0], image_i.shape[1]), generated_targets['affinity'].copy(), generated_targets['weights'].copy()) # Generate character heatmap after postprocessing character_target, characters_weight_map = generate_target_others( (image_i.shape[0], image_i.shape[1]), generated_targets['characters'].copy(), generated_targets['weights'].copy()) # Saving the affinity heatmap plt.imsave(base_target_path + '_next_target/affinity/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', affinity_target, cmap='gray') # Saving the character heatmap plt.imsave(base_target_path + '_next_target/character/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', character_target, cmap='gray') # Saving the affinity weight map plt.imsave( base_target_path + '_next_target/affinity_weight/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', affinity_weight_map, cmap='gray') # Saving the character weight map plt.imsave( base_target_path + '_next_target/character_weight/' + '.'.join(image_name[i].split('.')[:-1]) + '.png', characters_weight_map, cmap='gray') # Saving the target for next iteration in json format generated_targets['word_bbox'] = generated_targets[ 'word_bbox'].tolist() generated_targets['characters'] = [ word_i.tolist() for word_i in generated_targets['characters'] ] generated_targets['affinity'] = [ word_i.tolist() for word_i in generated_targets['affinity'] ] with open( base_target_path + '/' + '.'.join(image_name[i].split('.')[:-1]) + '.json', 'w') as f: json.dump(generated_targets, f)
def synthesize_with_score(dataloader, model, base_target_path): """ Given a path to a set of images(icdar 2013 dataset), and path to a pre-trained model, generate the character heatmap and affinity heatmap and a json of all the annotations :param dataloader: dataloader for icdar 2013 dataset :param model: pre-trained model :param base_target_path: path where to store the predictions :return: """ with torch.no_grad(): model.eval() iterator = tqdm(dataloader) for no, (image, image_name, original_dim, item) in enumerate(iterator): annots = [] for i in item: annot = dataloader.dataset.gt['annots'][dataloader.dataset.imnames[i]] annots.append(annot) if config.use_cuda: image = image.cuda() output = model(image) if type(output) == list: output = torch.cat(output, dim=0) output = output.data.cpu().numpy() original_dim = original_dim.cpu().numpy() for i in range(output.shape[0]): # --------- Resizing it back to the original image size and saving it ----------- # image_i = (image[i].data.cpu().numpy() * 255).astype(np.uint8).transpose(1, 2, 0) max_dim = original_dim[i].max() resizing_factor = 768/max_dim before_pad_dim = [int(original_dim[i][0]*resizing_factor), int(original_dim[i][1]*resizing_factor)] plt.imsave( base_target_path + '_affinity/'+'.'.join(image_name[i].split('.')[:-1])+'.png', np.float32(output[i, 1, :, :] > config.threshold_affinity), cmap='gray') plt.imsave( base_target_path + '_character/'+'.'.join(image_name[i].split('.')[:-1])+'.png', np.float32(output[i, 0, :, :] > config.threshold_character), cmap='gray') output[i, :, :, :] = np.uint8(output[i, :, :, :]*255) height_pad = (768 - before_pad_dim[0]) // 2 width_pad = (768 - before_pad_dim[1]) // 2 image_i = cv2.resize( image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0]) ) character_bbox = cv2.resize( output[i, 0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0]))/255 affinity_bbox = cv2.resize( output[i, 1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[i][1], original_dim[i][0]))/255 generated_targets = generate_word_bbox( character_bbox, affinity_bbox, character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity) if 'error_message' in generated_targets.keys(): print('There was an error while generating the target of ', image_name[i]) print('Error:', generated_targets['error_message']) continue generated_targets = get_weighted_character_target( generated_targets, {'bbox': annots[i]['bbox'], 'text': annots[i]['text']}, dataloader.dataset.unknown, config.threshold_fscore) cv2.drawContours(image_i, [np.array(word_bbox) for word_bbox in generated_targets['word_bbox']], -1, (0, 255, 0), 2) plt.imsave(base_target_path + '_word_bbox/'+'.'.join(image_name[i].split('.')[:-1])+'.png', image_i) with open(base_target_path + '/' + '.'.join(image_name[i].split('.')[:-1]) + '.json', 'w') as f: json.dump(generated_targets, f)
def generate_next_targets(original_dim, output, image, base_target_path, image_name, annots, dataloader, no): if 'datapile' in config.dataset_name: image_name = image_name.split('/')[-1] # visualize = config.visualize_generated and no % config.visualize_freq == 0 and no != 0 visualize = config.visualize_generated # Just for debuging max_dim = original_dim.max() resizing_factor = 768 / max_dim before_pad_dim = [ int(original_dim[0] * resizing_factor), int(original_dim[1] * resizing_factor) ] output = np.uint8(output * 255) height_pad = (768 - before_pad_dim[0]) // 2 width_pad = (768 - before_pad_dim[1]) // 2 character_bbox = cv2.resize( output[0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[1] // 2, original_dim[0] // 2)) / 255 affinity_bbox = cv2.resize( output[1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[1] // 2, original_dim[0] // 2)) / 255 # Generating word-bbox given character and affinity heatmap generated_targets = generate_word_bbox( character_bbox, affinity_bbox, character_threshold=config.threshold_character, affinity_threshold=config.threshold_affinity, word_threshold=config.threshold_word, character_threshold_upper=config.threshold_character_upper, affinity_threshold_upper=config.threshold_affinity_upper, scaling_character=config.scale_character, scaling_affinity=config.scale_affinity) generated_targets['word_bbox'] = generated_targets['word_bbox'] * 2 generated_targets['characters'] = [ i * 2 for i in generated_targets['characters'] ] generated_targets['affinity'] = [ i * 2 for i in generated_targets['affinity'] ] if visualize: character_bbox = cv2.resize((character_bbox * 255).astype(np.uint8), (original_dim[1], original_dim[0])) / 255 affinity_bbox = cv2.resize((affinity_bbox * 255).astype(np.uint8), (original_dim[1], original_dim[0])) / 255 image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose( 1, 2, 0)) image_i = cv2.resize( image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[1], original_dim[0])) # Saving affinity heat map plt.imsave(base_target_path + '_predicted/affinity/' + '.'.join(image_name.split('.')[:-1]) + '.png', np.float32(affinity_bbox > config.threshold_affinity_upper), cmap='gray') # Saving character heat map plt.imsave( base_target_path + '_predicted/character/' + '.'.join(image_name.split('.')[:-1]) + '.png', np.float32(character_bbox > config.threshold_character_upper), cmap='gray') cv2.drawContours(image_i, generated_targets['word_bbox'], -1, (0, 255, 0), 2) # Saving word bbox drawn on the original image plt.imsave( base_target_path + '_predicted/word_bbox/' + '.'.join(image_name.split('.')[:-1]) + '.png', image_i) predicted_word_bbox = generated_targets['word_bbox'].copy() # --------------- PostProcessing for creating the targets for the next iteration ---------------- # generated_targets = get_weighted_character_target( generated_targets, { 'bbox': annots['bbox'], 'text': annots['text'] }, dataloader.dataset.unknown, config.threshold_fscore, config.weight_threshold) target_word_bbox = generated_targets['word_bbox'].copy() f_score = calculate_fscore( predicted_word_bbox[:, :, 0, :], target_word_bbox[:, :, 0, :], text_target=annots['text'], unknown=dataloader.dataset.gt['unknown'])['f_score'] if visualize: image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose( 1, 2, 0)) image_i = cv2.resize( image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]], (original_dim[1], original_dim[0])) # Generated word_bbox after postprocessing cv2.drawContours(image_i, generated_targets['word_bbox'], -1, (0, 255, 0), 2) # Saving word bbox after postprocessing plt.imsave( base_target_path + '_next_target/word_bbox/' + '.'.join(image_name.split('.')[:-1]) + '.png', image_i) # Generate affinity heatmap after postprocessing affinity_target, affinity_weight_map = generate_target_others( (image_i.shape[0], image_i.shape[1]), generated_targets['affinity'].copy(), np.array(generated_targets['weights'])[:, 1]) # Generate character heatmap after postprocessing character_target, characters_weight_map = generate_target_others( (image_i.shape[0], image_i.shape[1]), generated_targets['characters'].copy(), np.array(generated_targets['weights'])[:, 0]) # Saving the affinity heatmap plt.imsave(base_target_path + '_next_target/affinity/' + '.'.join(image_name.split('.')[:-1]) + '.png', affinity_target, cmap='gray') # Saving the character heatmap plt.imsave(base_target_path + '_next_target/character/' + '.'.join(image_name.split('.')[:-1]) + '.png', character_target, cmap='gray') # Saving the affinity weight map plt.imsave(base_target_path + '_next_target/affinity_weight/' + '.'.join(image_name.split('.')[:-1]) + '.png', affinity_weight_map, cmap='gray') # Saving the character weight map plt.imsave(base_target_path + '_next_target/character_weight/' + '.'.join(image_name.split('.')[:-1]) + '.png', characters_weight_map, cmap='gray') # Saving the target for next iteration in json format generated_targets['word_bbox'] = generated_targets['word_bbox'].tolist() generated_targets['characters'] = [ word_i.tolist() for word_i in generated_targets['characters'] ] generated_targets['affinity'] = [ word_i.tolist() for word_i in generated_targets['affinity'] ] with open(base_target_path + '/' + image_name + '.json', 'w') as f: json.dump(generated_targets, f) return f_score