Ejemplo n.º 1
0
def synthesize_with_score(dataloader, model, base_target_path):
    """
	Given a path to a set of images(icdar 2013 dataset), and path to a pre-trained model, generate the character heatmap
	and affinity heatmap and a json of all the annotations
	:param dataloader: dataloader for icdar 2013 dataset
	:param model: pre-trained model
	:param base_target_path: path where to store the predictions
	:return:
	"""

    with torch.no_grad():

        model.eval()
        iterator = tqdm(dataloader)

        for no, (image, image_name, original_dim, item) in enumerate(iterator):

            annots = []

            for i in item:
                annot = dataloader.dataset.gt['annots'][
                    dataloader.dataset.imnames[i]]
                annots.append(annot)

            if config.use_cuda:
                image = image.cuda()

            output = model(image)

            if type(output) == list:
                output = torch.cat(output, dim=0)

            output = output.data.cpu().numpy()
            original_dim = original_dim.cpu().numpy()

            for i in range(output.shape[0]):

                # --------- Resizing it back to the original image size and saving it ----------- #

                max_dim = original_dim[i].max()
                resizing_factor = 768 / max_dim
                before_pad_dim = [
                    int(original_dim[i][0] * resizing_factor),
                    int(original_dim[i][1] * resizing_factor)
                ]

                output[i, :, :, :] = np.uint8(output[i, :, :, :] * 255)

                height_pad = (768 - before_pad_dim[0]) // 2
                width_pad = (768 - before_pad_dim[1]) // 2

                character_bbox = cv2.resize(
                    output[i, 0, height_pad:height_pad + before_pad_dim[0],
                           width_pad:width_pad + before_pad_dim[1]],
                    (original_dim[i][1], original_dim[i][0])) / 255

                affinity_bbox = cv2.resize(
                    output[i, 1, height_pad:height_pad + before_pad_dim[0],
                           width_pad:width_pad + before_pad_dim[1]],
                    (original_dim[i][1], original_dim[i][0])) / 255

                image_i = (image[i].data.cpu().numpy() * 255).astype(
                    np.uint8).transpose(1, 2, 0)
                image_i = cv2.resize(
                    image_i[height_pad:height_pad + before_pad_dim[0],
                            width_pad:width_pad + before_pad_dim[1]],
                    (original_dim[i][1], original_dim[i][0]))
                image_i_backup = image_i.copy()

                # Generating word-bbox given character and affinity heatmap

                generated_targets = generate_word_bbox(
                    character_bbox,
                    affinity_bbox,
                    character_threshold=config.threshold_character,
                    affinity_threshold=config.threshold_affinity)

                if 'error_message' in generated_targets.keys():
                    print('There was an error while generating the target of ',
                          image_name[i])
                    print('Error:', generated_targets['error_message'])
                    continue

                if config.visualize_generated:

                    # Saving affinity heat map
                    plt.imsave(
                        base_target_path + '_predicted/affinity/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        np.float32(affinity_bbox > config.threshold_affinity),
                        cmap='gray')

                    # Saving character heat map
                    plt.imsave(
                        base_target_path + '_predicted/character/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        np.float32(
                            character_bbox > config.threshold_character),
                        cmap='gray')

                    cv2.drawContours(image_i, generated_targets['word_bbox'],
                                     -1, (0, 255, 0), 2)

                    # Saving word bbox drawn on the original image
                    plt.imsave(
                        base_target_path + '_predicted/word_bbox/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        image_i)

                # --------------- PostProcessing for creating the targets for the next iteration ---------------- #

                generated_targets = get_weighted_character_target(
                    generated_targets, {
                        'bbox': annots[i]['bbox'],
                        'text': annots[i]['text']
                    }, dataloader.dataset.unknown, config.threshold_fscore)

                if config.visualize_generated:

                    image_i = (image[i].data.cpu().numpy() * 255).astype(
                        np.uint8).transpose(1, 2, 0)
                    image_i = cv2.resize(
                        image_i[height_pad:height_pad + before_pad_dim[0],
                                width_pad:width_pad + before_pad_dim[1]],
                        (original_dim[i][1], original_dim[i][0]))

                    # Generated word_bbox after postprocessing
                    cv2.drawContours(image_i, generated_targets['word_bbox'],
                                     -1, (0, 255, 0), 2)

                    # Saving word bbox after postprocessing
                    plt.imsave(
                        base_target_path + '_next_target/word_bbox/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        image_i)

                    # Generate affinity heatmap after postprocessing
                    affinity_target, affinity_weight_map = generate_target_others(
                        (image_i.shape[0], image_i.shape[1]),
                        generated_targets['affinity'].copy(),
                        generated_targets['weights'].copy())

                    # Generate character heatmap after postprocessing
                    character_target, characters_weight_map = generate_target_others(
                        (image_i.shape[0], image_i.shape[1]),
                        generated_targets['characters'].copy(),
                        generated_targets['weights'].copy())

                    # Saving the affinity heatmap
                    plt.imsave(base_target_path + '_next_target/affinity/' +
                               '.'.join(image_name[i].split('.')[:-1]) +
                               '.png',
                               affinity_target,
                               cmap='gray')

                    # Saving the character heatmap
                    plt.imsave(base_target_path + '_next_target/character/' +
                               '.'.join(image_name[i].split('.')[:-1]) +
                               '.png',
                               character_target,
                               cmap='gray')

                    # Saving the affinity weight map
                    plt.imsave(
                        base_target_path + '_next_target/affinity_weight/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        affinity_weight_map,
                        cmap='gray')

                    # Saving the character weight map
                    plt.imsave(
                        base_target_path + '_next_target/character_weight/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        characters_weight_map,
                        cmap='gray')

                # Saving the target for next iteration in json format

                generated_targets['word_bbox'] = generated_targets[
                    'word_bbox'].tolist()
                generated_targets['characters'] = [
                    word_i.tolist()
                    for word_i in generated_targets['characters']
                ]
                generated_targets['affinity'] = [
                    word_i.tolist() for word_i in generated_targets['affinity']
                ]

                with open(
                        base_target_path + '/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.json',
                        'w') as f:
                    json.dump(generated_targets, f)
Ejemplo n.º 2
0
def synthesize_with_score(dataloader, model, base_target_path):

	"""
	Given a path to a set of images(icdar 2013 dataset), and path to a pre-trained model, generate the character heatmap
	and affinity heatmap and a json of all the annotations
	:param dataloader: dataloader for icdar 2013 dataset
	:param model: pre-trained model
	:param base_target_path: path where to store the predictions
	:return:
	"""

	with torch.no_grad():

		model.eval()
		iterator = tqdm(dataloader)

		for no, (image, image_name, original_dim, item) in enumerate(iterator):

			annots = []

			for i in item:
				annot = dataloader.dataset.gt['annots'][dataloader.dataset.imnames[i]]
				annots.append(annot)

			if config.use_cuda:
				image = image.cuda()

			output = model(image)

			if type(output) == list:
				output = torch.cat(output, dim=0)

			output = output.data.cpu().numpy()
			original_dim = original_dim.cpu().numpy()

			for i in range(output.shape[0]):

				# --------- Resizing it back to the original image size and saving it ----------- #

				image_i = (image[i].data.cpu().numpy() * 255).astype(np.uint8).transpose(1, 2, 0)

				max_dim = original_dim[i].max()
				resizing_factor = 768/max_dim
				before_pad_dim = [int(original_dim[i][0]*resizing_factor), int(original_dim[i][1]*resizing_factor)]

				plt.imsave(
					base_target_path + '_affinity/'+'.'.join(image_name[i].split('.')[:-1])+'.png',
					np.float32(output[i, 1, :, :] > config.threshold_affinity),
					cmap='gray')
				plt.imsave(
					base_target_path + '_character/'+'.'.join(image_name[i].split('.')[:-1])+'.png',
					np.float32(output[i, 0, :, :] > config.threshold_character), cmap='gray')

				output[i, :, :, :] = np.uint8(output[i, :, :, :]*255)

				height_pad = (768 - before_pad_dim[0]) // 2
				width_pad = (768 - before_pad_dim[1]) // 2

				image_i = cv2.resize(
					image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0])
				)

				character_bbox = cv2.resize(
					output[i, 0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0]))/255

				affinity_bbox = cv2.resize(
					output[i, 1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0]))/255

				generated_targets = generate_word_bbox(
					character_bbox, affinity_bbox,
					character_threshold=config.threshold_character,
					affinity_threshold=config.threshold_affinity)

				if 'error_message' in generated_targets.keys():
					print('There was an error while generating the target of ', image_name[i])
					print('Error:', generated_targets['error_message'])
					continue

				generated_targets = get_weighted_character_target(
					generated_targets, {'bbox': annots[i]['bbox'], 'text': annots[i]['text']},
					dataloader.dataset.unknown,
					config.threshold_fscore)

				cv2.drawContours(image_i, [np.array(word_bbox) for word_bbox in generated_targets['word_bbox']], -1, (0, 255, 0), 2)

				plt.imsave(base_target_path + '_word_bbox/'+'.'.join(image_name[i].split('.')[:-1])+'.png', image_i)

				with open(base_target_path + '/' + '.'.join(image_name[i].split('.')[:-1]) + '.json', 'w') as f:
					json.dump(generated_targets, f)
Ejemplo n.º 3
0
def generate_next_targets(original_dim, output, image, base_target_path,
                          image_name, annots, dataloader, no):
    if 'datapile' in config.dataset_name:
        image_name = image_name.split('/')[-1]
    # visualize = config.visualize_generated and no % config.visualize_freq == 0 and no != 0
    visualize = config.visualize_generated  # Just for debuging
    max_dim = original_dim.max()
    resizing_factor = 768 / max_dim
    before_pad_dim = [
        int(original_dim[0] * resizing_factor),
        int(original_dim[1] * resizing_factor)
    ]

    output = np.uint8(output * 255)

    height_pad = (768 - before_pad_dim[0]) // 2
    width_pad = (768 - before_pad_dim[1]) // 2

    character_bbox = cv2.resize(
        output[0, height_pad:height_pad + before_pad_dim[0],
               width_pad:width_pad + before_pad_dim[1]],
        (original_dim[1] // 2, original_dim[0] // 2)) / 255

    affinity_bbox = cv2.resize(
        output[1, height_pad:height_pad + before_pad_dim[0],
               width_pad:width_pad + before_pad_dim[1]],
        (original_dim[1] // 2, original_dim[0] // 2)) / 255

    # Generating word-bbox given character and affinity heatmap

    generated_targets = generate_word_bbox(
        character_bbox,
        affinity_bbox,
        character_threshold=config.threshold_character,
        affinity_threshold=config.threshold_affinity,
        word_threshold=config.threshold_word,
        character_threshold_upper=config.threshold_character_upper,
        affinity_threshold_upper=config.threshold_affinity_upper,
        scaling_character=config.scale_character,
        scaling_affinity=config.scale_affinity)

    generated_targets['word_bbox'] = generated_targets['word_bbox'] * 2
    generated_targets['characters'] = [
        i * 2 for i in generated_targets['characters']
    ]
    generated_targets['affinity'] = [
        i * 2 for i in generated_targets['affinity']
    ]

    if visualize:

        character_bbox = cv2.resize((character_bbox * 255).astype(np.uint8),
                                    (original_dim[1], original_dim[0])) / 255

        affinity_bbox = cv2.resize((affinity_bbox * 255).astype(np.uint8),
                                   (original_dim[1], original_dim[0])) / 255

        image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose(
            1, 2, 0))

        image_i = cv2.resize(
            image_i[height_pad:height_pad + before_pad_dim[0],
                    width_pad:width_pad + before_pad_dim[1]],
            (original_dim[1], original_dim[0]))

        # Saving affinity heat map
        plt.imsave(base_target_path + '_predicted/affinity/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   np.float32(affinity_bbox > config.threshold_affinity_upper),
                   cmap='gray')

        # Saving character heat map
        plt.imsave(
            base_target_path + '_predicted/character/' +
            '.'.join(image_name.split('.')[:-1]) + '.png',
            np.float32(character_bbox > config.threshold_character_upper),
            cmap='gray')

        cv2.drawContours(image_i, generated_targets['word_bbox'], -1,
                         (0, 255, 0), 2)

        # Saving word bbox drawn on the original image
        plt.imsave(
            base_target_path + '_predicted/word_bbox/' +
            '.'.join(image_name.split('.')[:-1]) + '.png', image_i)

    predicted_word_bbox = generated_targets['word_bbox'].copy()
    # --------------- PostProcessing for creating the targets for the next iteration ---------------- #
    generated_targets = get_weighted_character_target(
        generated_targets, {
            'bbox': annots['bbox'],
            'text': annots['text']
        }, dataloader.dataset.unknown, config.threshold_fscore,
        config.weight_threshold)
    target_word_bbox = generated_targets['word_bbox'].copy()

    f_score = calculate_fscore(
        predicted_word_bbox[:, :, 0, :],
        target_word_bbox[:, :, 0, :],
        text_target=annots['text'],
        unknown=dataloader.dataset.gt['unknown'])['f_score']

    if visualize:
        image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose(
            1, 2, 0))
        image_i = cv2.resize(
            image_i[height_pad:height_pad + before_pad_dim[0],
                    width_pad:width_pad + before_pad_dim[1]],
            (original_dim[1], original_dim[0]))

        # Generated word_bbox after postprocessing
        cv2.drawContours(image_i, generated_targets['word_bbox'], -1,
                         (0, 255, 0), 2)

        # Saving word bbox after postprocessing
        plt.imsave(
            base_target_path + '_next_target/word_bbox/' +
            '.'.join(image_name.split('.')[:-1]) + '.png', image_i)

        # Generate affinity heatmap after postprocessing
        affinity_target, affinity_weight_map = generate_target_others(
            (image_i.shape[0], image_i.shape[1]),
            generated_targets['affinity'].copy(),
            np.array(generated_targets['weights'])[:, 1])

        # Generate character heatmap after postprocessing
        character_target, characters_weight_map = generate_target_others(
            (image_i.shape[0], image_i.shape[1]),
            generated_targets['characters'].copy(),
            np.array(generated_targets['weights'])[:, 0])

        # Saving the affinity heatmap
        plt.imsave(base_target_path + '_next_target/affinity/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   affinity_target,
                   cmap='gray')

        # Saving the character heatmap
        plt.imsave(base_target_path + '_next_target/character/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   character_target,
                   cmap='gray')

        # Saving the affinity weight map
        plt.imsave(base_target_path + '_next_target/affinity_weight/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   affinity_weight_map,
                   cmap='gray')

        # Saving the character weight map
        plt.imsave(base_target_path + '_next_target/character_weight/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   characters_weight_map,
                   cmap='gray')

    # Saving the target for next iteration in json format

    generated_targets['word_bbox'] = generated_targets['word_bbox'].tolist()
    generated_targets['characters'] = [
        word_i.tolist() for word_i in generated_targets['characters']
    ]
    generated_targets['affinity'] = [
        word_i.tolist() for word_i in generated_targets['affinity']
    ]

    with open(base_target_path + '/' + image_name + '.json', 'w') as f:
        json.dump(generated_targets, f)

    return f_score