コード例 #1
0
def save(no, dataset_name, output, image, character_map, affinity_map, character_weight, affinity_weight):

    os.makedirs('Temporary/' + str(no), exist_ok=True)

    for __, _ in enumerate(dataset_name):

        os.makedirs('Temporary/'+str(no)+'/'+str(__), exist_ok=True)

        generated = generate_word_bbox(
            output[__, 0].data.cpu().numpy(), output[__, 1].data.cpu().numpy(),
            config.threshold_character, config.threshold_affinity, config.threshold_word,
            config.threshold_character_upper, config.threshold_affinity_upper, config.scale_character, config.scale_affinity
        )

        output_image = denormalize_mean_variance(
            image[__].data.cpu().numpy().transpose(1, 2, 0))

        cv2.drawContours(
            output_image, generated['word_bbox'], -1, (0, 255, 0), 2)

        plt.imsave('Temporary/'+str(no)+'/'+str(__) +
                   '/image_.png', output_image)

        cv2.imwrite('Temporary/'+str(no)+'/'+str(__)+'/char_map.png',
                    np.uint8(output[__, 0].data.cpu().numpy()*255))

        cv2.imwrite('Temporary/'+str(no)+'/'+str(__)+'/aff_map.png',
                    np.uint8(output[__, 1].data.cpu().numpy()*255))

        cv2.imwrite(
            'Temporary/' + str(no) + '/' + str(__) +
            '/char_map_threshold_upper.png',
            np.uint8(np.float32(output[__, 0].data.cpu().numpy() > config.threshold_character_upper) * 255))

        cv2.imwrite(
            'Temporary/' + str(no) + '/' + str(__) +
            '/aff_map_threshold_upper.png',
            np.uint8(np.float32(output[__, 1].data.cpu().numpy() > config.threshold_affinity_upper) * 255))

        cv2.imwrite(
            'Temporary/' + str(no) + '/' + str(__) +
            '/char_map_threshold_lower.png',
            np.uint8(np.float32(output[__, 0].data.cpu().numpy() > config.threshold_character) * 255))

        cv2.imwrite(
            'Temporary/' + str(no) + '/' + str(__) +
            '/aff_map_threshold_lower.png',
            np.uint8(np.float32(output[__, 1].data.cpu().numpy() > config.threshold_affinity) * 255))

        cv2.imwrite(
            'Temporary/'+str(no)+'/'+str(__)+'/target_char_map.png', np.uint8(character_map[__].data.cpu().numpy()*255))

        cv2.imwrite(
            'Temporary/'+str(no)+'/'+str(__)+'/target_affinity_map.png', np.uint8(affinity_map[__].data.cpu().numpy()*255))

        cv2.imwrite(
            'Temporary/'+str(no)+'/'+str(__)+'/weight_char_map.png', np.uint8(character_weight[__].data.cpu().numpy()*255))

        cv2.imwrite(
            'Temporary/'+str(no)+'/'+str(__)+'/weight_affinity_map.png', np.uint8(affinity_weight[__].data.cpu().numpy()*255))
コード例 #2
0
def synthesize(dataloader, model, base_path_affinity, base_path_character, base_path_bbox):

	"""

	Given a path to a set of images, and path to a pre-trained model, generate the character heatmap and affinity heatmap

	:param dataloader: A Pytorch dataloader for loading and resizing the images of the folder
	:param model: A pre-trained model
	:param base_path_affinity: Path where to store the predicted affinity heatmap
	:param base_path_character: Path where to store the predicted character heatmap
	:param base_path_bbox: Path where to store the word_bbox overlapped on images
	:return: None
	"""

	with torch.no_grad():

		model.eval()
		iterator = tqdm(dataloader)

		for no, (image, image_name, original_dim) in enumerate(iterator):

			if config.use_cuda:
				image = image.cuda()

			output = model(image)

			if type(output) == list:

				# If using custom DataParallelModel this is necessary to convert the list to tensor
				output = torch.cat(output, dim=0)

			output = output.data.cpu().numpy()
			original_dim = original_dim.cpu().numpy()

			for i in range(output.shape[0]):

				# --------- Resizing it back to the original image size and saving it ----------- #

				image_i = (image[i].data.cpu().numpy() * 255).astype(np.uint8).transpose(1, 2, 0)

				max_dim = original_dim[i].max()
				resizing_factor = 768/max_dim
				before_pad_dim = [int(original_dim[i][0]*resizing_factor), int(original_dim[i][1]*resizing_factor)]

				output[i, :, :, :] = np.uint8(output[i, :, :, :]*255)

				height_pad = (768 - before_pad_dim[0])//2
				width_pad = (768 - before_pad_dim[1])//2

				image_i = cv2.resize(
					image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0])
				)

				character_bbox = cv2.resize(
					output[i, 0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0])
				)/255

				affinity_bbox = cv2.resize(
					output[i, 1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0])
				)/255

				predicted_bbox = generate_word_bbox(
					character_bbox,
					affinity_bbox,
					character_threshold=config.threshold_character,
					affinity_threshold=config.threshold_affinity)['word_bbox']

				predicted_bbox = [np.array(predicted_bbox_i) for predicted_bbox_i in predicted_bbox]

				cv2.drawContours(image_i, predicted_bbox, -1, (0, 255, 0), 2)

				plt.imsave(
					base_path_bbox + '/' + '.'.join(image_name[i].split('.')[:-1]) + '.png',
					image_i)

				plt.imsave(
					base_path_character + '/' + '.'.join(image_name[i].split('.')[:-1]) + '.png',
					np.float32(character_bbox > config.threshold_character),
					cmap='gray')

				plt.imsave(
					base_path_affinity+'/'+'.'.join(image_name[i].split('.')[:-1])+'.png',
					np.float32(affinity_bbox > config.threshold_affinity),
					cmap='gray')
コード例 #3
0
def synthesize_with_score(dataloader, model, base_target_path):

	"""
	Given a path to a set of images(icdar 2013 dataset), and path to a pre-trained model, generate the character heatmap
	and affinity heatmap and a json of all the annotations
	:param dataloader: dataloader for icdar 2013 dataset
	:param model: pre-trained model
	:param base_target_path: path where to store the predictions
	:return:
	"""

	with torch.no_grad():

		model.eval()
		iterator = tqdm(dataloader)

		for no, (image, image_name, original_dim, item) in enumerate(iterator):

			annots = []

			for i in item:
				annot = dataloader.dataset.gt['annots'][dataloader.dataset.imnames[i]]
				annots.append(annot)

			if config.use_cuda:
				image = image.cuda()

			output = model(image)

			if type(output) == list:
				output = torch.cat(output, dim=0)

			output = output.data.cpu().numpy()
			original_dim = original_dim.cpu().numpy()

			for i in range(output.shape[0]):

				# --------- Resizing it back to the original image size and saving it ----------- #

				image_i = (image[i].data.cpu().numpy() * 255).astype(np.uint8).transpose(1, 2, 0)

				max_dim = original_dim[i].max()
				resizing_factor = 768/max_dim
				before_pad_dim = [int(original_dim[i][0]*resizing_factor), int(original_dim[i][1]*resizing_factor)]

				plt.imsave(
					base_target_path + '_affinity/'+'.'.join(image_name[i].split('.')[:-1])+'.png',
					np.float32(output[i, 1, :, :] > config.threshold_affinity),
					cmap='gray')
				plt.imsave(
					base_target_path + '_character/'+'.'.join(image_name[i].split('.')[:-1])+'.png',
					np.float32(output[i, 0, :, :] > config.threshold_character), cmap='gray')

				output[i, :, :, :] = np.uint8(output[i, :, :, :]*255)

				height_pad = (768 - before_pad_dim[0]) // 2
				width_pad = (768 - before_pad_dim[1]) // 2

				image_i = cv2.resize(
					image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0])
				)

				character_bbox = cv2.resize(
					output[i, 0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0]))/255

				affinity_bbox = cv2.resize(
					output[i, 1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0]))/255

				generated_targets = generate_word_bbox(
					character_bbox, affinity_bbox,
					character_threshold=config.threshold_character,
					affinity_threshold=config.threshold_affinity)

				if 'error_message' in generated_targets.keys():
					print('There was an error while generating the target of ', image_name[i])
					print('Error:', generated_targets['error_message'])
					continue

				generated_targets = get_weighted_character_target(
					generated_targets, {'bbox': annots[i]['bbox'], 'text': annots[i]['text']},
					dataloader.dataset.unknown,
					config.threshold_fscore)

				cv2.drawContours(image_i, [np.array(word_bbox) for word_bbox in generated_targets['word_bbox']], -1, (0, 255, 0), 2)

				plt.imsave(base_target_path + '_word_bbox/'+'.'.join(image_name[i].split('.')[:-1])+'.png', image_i)

				with open(base_target_path + '/' + '.'.join(image_name[i].split('.')[:-1]) + '.json', 'w') as f:
					json.dump(generated_targets, f)
コード例 #4
0
def synthesize_with_score(dataloader, model, base_target_path):
    """
	Given a path to a set of images(icdar 2013 dataset), and path to a pre-trained model, generate the character heatmap
	and affinity heatmap and a json of all the annotations
	:param dataloader: dataloader for icdar 2013 dataset
	:param model: pre-trained model
	:param base_target_path: path where to store the predictions
	:return:
	"""

    with torch.no_grad():

        model.eval()
        iterator = tqdm(dataloader)

        for no, (image, image_name, original_dim, item) in enumerate(iterator):

            annots = []

            for i in item:
                annot = dataloader.dataset.gt['annots'][
                    dataloader.dataset.imnames[i]]
                annots.append(annot)

            if config.use_cuda:
                image = image.cuda()

            output = model(image)

            if type(output) == list:
                output = torch.cat(output, dim=0)

            output = output.data.cpu().numpy()
            original_dim = original_dim.cpu().numpy()

            for i in range(output.shape[0]):

                # --------- Resizing it back to the original image size and saving it ----------- #

                max_dim = original_dim[i].max()
                resizing_factor = 768 / max_dim
                before_pad_dim = [
                    int(original_dim[i][0] * resizing_factor),
                    int(original_dim[i][1] * resizing_factor)
                ]

                output[i, :, :, :] = np.uint8(output[i, :, :, :] * 255)

                height_pad = (768 - before_pad_dim[0]) // 2
                width_pad = (768 - before_pad_dim[1]) // 2

                character_bbox = cv2.resize(
                    output[i, 0, height_pad:height_pad + before_pad_dim[0],
                           width_pad:width_pad + before_pad_dim[1]],
                    (original_dim[i][1], original_dim[i][0])) / 255

                affinity_bbox = cv2.resize(
                    output[i, 1, height_pad:height_pad + before_pad_dim[0],
                           width_pad:width_pad + before_pad_dim[1]],
                    (original_dim[i][1], original_dim[i][0])) / 255

                image_i = (image[i].data.cpu().numpy() * 255).astype(
                    np.uint8).transpose(1, 2, 0)
                image_i = cv2.resize(
                    image_i[height_pad:height_pad + before_pad_dim[0],
                            width_pad:width_pad + before_pad_dim[1]],
                    (original_dim[i][1], original_dim[i][0]))
                image_i_backup = image_i.copy()

                # Generating word-bbox given character and affinity heatmap

                generated_targets = generate_word_bbox(
                    character_bbox,
                    affinity_bbox,
                    character_threshold=config.threshold_character,
                    affinity_threshold=config.threshold_affinity)

                if 'error_message' in generated_targets.keys():
                    print('There was an error while generating the target of ',
                          image_name[i])
                    print('Error:', generated_targets['error_message'])
                    continue

                if config.visualize_generated:

                    # Saving affinity heat map
                    plt.imsave(
                        base_target_path + '_predicted/affinity/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        np.float32(affinity_bbox > config.threshold_affinity),
                        cmap='gray')

                    # Saving character heat map
                    plt.imsave(
                        base_target_path + '_predicted/character/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        np.float32(
                            character_bbox > config.threshold_character),
                        cmap='gray')

                    cv2.drawContours(image_i, generated_targets['word_bbox'],
                                     -1, (0, 255, 0), 2)

                    # Saving word bbox drawn on the original image
                    plt.imsave(
                        base_target_path + '_predicted/word_bbox/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        image_i)

                # --------------- PostProcessing for creating the targets for the next iteration ---------------- #

                generated_targets = get_weighted_character_target(
                    generated_targets, {
                        'bbox': annots[i]['bbox'],
                        'text': annots[i]['text']
                    }, dataloader.dataset.unknown, config.threshold_fscore)

                if config.visualize_generated:

                    image_i = (image[i].data.cpu().numpy() * 255).astype(
                        np.uint8).transpose(1, 2, 0)
                    image_i = cv2.resize(
                        image_i[height_pad:height_pad + before_pad_dim[0],
                                width_pad:width_pad + before_pad_dim[1]],
                        (original_dim[i][1], original_dim[i][0]))

                    # Generated word_bbox after postprocessing
                    cv2.drawContours(image_i, generated_targets['word_bbox'],
                                     -1, (0, 255, 0), 2)

                    # Saving word bbox after postprocessing
                    plt.imsave(
                        base_target_path + '_next_target/word_bbox/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        image_i)

                    # Generate affinity heatmap after postprocessing
                    affinity_target, affinity_weight_map = generate_target_others(
                        (image_i.shape[0], image_i.shape[1]),
                        generated_targets['affinity'].copy(),
                        generated_targets['weights'].copy())

                    # Generate character heatmap after postprocessing
                    character_target, characters_weight_map = generate_target_others(
                        (image_i.shape[0], image_i.shape[1]),
                        generated_targets['characters'].copy(),
                        generated_targets['weights'].copy())

                    # Saving the affinity heatmap
                    plt.imsave(base_target_path + '_next_target/affinity/' +
                               '.'.join(image_name[i].split('.')[:-1]) +
                               '.png',
                               affinity_target,
                               cmap='gray')

                    # Saving the character heatmap
                    plt.imsave(base_target_path + '_next_target/character/' +
                               '.'.join(image_name[i].split('.')[:-1]) +
                               '.png',
                               character_target,
                               cmap='gray')

                    # Saving the affinity weight map
                    plt.imsave(
                        base_target_path + '_next_target/affinity_weight/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        affinity_weight_map,
                        cmap='gray')

                    # Saving the character weight map
                    plt.imsave(
                        base_target_path + '_next_target/character_weight/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        characters_weight_map,
                        cmap='gray')

                # Saving the target for next iteration in json format

                generated_targets['word_bbox'] = generated_targets[
                    'word_bbox'].tolist()
                generated_targets['characters'] = [
                    word_i.tolist()
                    for word_i in generated_targets['characters']
                ]
                generated_targets['affinity'] = [
                    word_i.tolist() for word_i in generated_targets['affinity']
                ]

                with open(
                        base_target_path + '/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.json',
                        'w') as f:
                    json.dump(generated_targets, f)
コード例 #5
0
def synthesize(dataloader, model, base_path_affinity, base_path_character,
               base_path_bbox, base_path_char, base_path_aff, base_path_json):
    """

	Given a path to a set of images, and path to a pre-trained model, generate the character heatmap and affinity heatmap

	:param dataloader: A Pytorch dataloader for loading and resizing the images of the folder
	:param model: A pre-trained model
	:param base_path_affinity: Path where to store the predicted affinity heatmap
	:param base_path_character: Path where to store the predicted character heatmap
	:param base_path_bbox: Path where to store the word_bbox overlapped on images
	:param base_path_aff: Path where to store the predicted affinity bbox
	:param base_path_char: Path where to store the predicted character bbox
	:param base_path_json: Path where to store the predicted bbox in json format
	:return: None
	"""

    with torch.no_grad():

        model.eval()
        iterator = tqdm(dataloader)

        for no, (image, image_name, original_dim) in enumerate(iterator):

            if config.use_cuda:
                image = image.cuda()

            output = model(image)

            if type(output) == list:

                # If using custom DataParallelModel this is necessary to convert the list to tensor
                output = torch.cat(output, dim=0)

            output = output.data.cpu().numpy()
            output[output < 0] = 0
            output[output > 1] = 1
            original_dim = original_dim.cpu().numpy()

            for i in range(output.shape[0]):

                # --------- Resizing it back to the original image size and saving it ----------- #

                image_i = denormalize_mean_variance(
                    image[i].data.cpu().numpy().transpose(1, 2, 0))

                max_dim = original_dim[i].max()
                resizing_factor = 768 / max_dim
                before_pad_dim = [
                    int(original_dim[i][0] * resizing_factor),
                    int(original_dim[i][1] * resizing_factor)
                ]

                output[i, :, :, :] = np.uint8(output[i, :, :, :] * 255)

                height_pad = (768 - before_pad_dim[0]) // 2
                width_pad = (768 - before_pad_dim[1]) // 2

                image_i = cv2.resize(
                    image_i[height_pad:height_pad + before_pad_dim[0],
                            width_pad:width_pad + before_pad_dim[1]],
                    (original_dim[i][1], original_dim[i][0]))

                character_bbox = cv2.resize(
                    output[i, 0, height_pad:height_pad + before_pad_dim[0],
                           width_pad:width_pad + before_pad_dim[1]],
                    (original_dim[i][1], original_dim[i][0])) / 255

                affinity_bbox = cv2.resize(
                    output[i, 1, height_pad:height_pad + before_pad_dim[0],
                           width_pad:width_pad + before_pad_dim[1]],
                    (original_dim[i][1], original_dim[i][0])) / 255

                predicted_bbox = generate_word_bbox(
                    character_bbox,
                    affinity_bbox,
                    character_threshold=config.threshold_character,
                    affinity_threshold=config.threshold_affinity,
                    word_threshold=config.threshold_word,
                    character_threshold_upper=config.threshold_character_upper,
                    affinity_threshold_upper=config.threshold_affinity_upper,
                    scaling_character=config.scale_character,
                    scaling_affinity=config.scale_affinity)

                word_bbox = predicted_bbox['word_bbox']
                char_bbox = np.concatenate(predicted_bbox['characters'],
                                           axis=0)
                aff_bbox = np.concatenate(predicted_bbox['affinity'], axis=0)

                word_image = image_i.copy()
                char_image = image_i.copy()
                aff_image = image_i.copy()

                cv2.drawContours(word_image, word_bbox, -1, (0, 255, 0), 2)
                cv2.drawContours(char_image, char_bbox, -1, (0, 255, 0), 2)
                cv2.drawContours(aff_image, aff_bbox, -1, (0, 255, 0), 2)

                plt.imsave(
                    base_path_char + '/' +
                    '.'.join(image_name[i].split('.')[:-1]) + '.png',
                    char_image)

                plt.imsave(
                    base_path_aff + '/' +
                    '.'.join(image_name[i].split('.')[:-1]) + '.png',
                    aff_image)

                plt.imsave(
                    base_path_bbox + '/' +
                    '.'.join(image_name[i].split('.')[:-1]) + '.png',
                    word_image)

                plt.imsave(
                    base_path_character + '/' +
                    '.'.join(image_name[i].split('.')[:-1]) + '.png',
                    np.float32(character_bbox > config.threshold_character),
                    cmap='gray')

                plt.imsave(
                    base_path_affinity + '/' +
                    '.'.join(image_name[i].split('.')[:-1]) + '.png',
                    np.float32(affinity_bbox > config.threshold_affinity),
                    cmap='gray')

                predicted_bbox['word_bbox'] = predicted_bbox[
                    'word_bbox'].tolist()
                predicted_bbox['characters'] = [
                    _.tolist() for _ in predicted_bbox['characters']
                ]
                predicted_bbox['affinity'] = [
                    _.tolist() for _ in predicted_bbox['affinity']
                ]

                with open(
                        base_path_json + '/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.json',
                        'w') as f:
                    json.dump(predicted_bbox, f)
コード例 #6
0
def generate_next_targets(original_dim, output, image, base_target_path,
                          image_name, annots, dataloader, no):
    if 'datapile' in config.dataset_name:
        image_name = image_name.split('/')[-1]
    # visualize = config.visualize_generated and no % config.visualize_freq == 0 and no != 0
    visualize = config.visualize_generated  # Just for debuging
    max_dim = original_dim.max()
    resizing_factor = 768 / max_dim
    before_pad_dim = [
        int(original_dim[0] * resizing_factor),
        int(original_dim[1] * resizing_factor)
    ]

    output = np.uint8(output * 255)

    height_pad = (768 - before_pad_dim[0]) // 2
    width_pad = (768 - before_pad_dim[1]) // 2

    character_bbox = cv2.resize(
        output[0, height_pad:height_pad + before_pad_dim[0],
               width_pad:width_pad + before_pad_dim[1]],
        (original_dim[1] // 2, original_dim[0] // 2)) / 255

    affinity_bbox = cv2.resize(
        output[1, height_pad:height_pad + before_pad_dim[0],
               width_pad:width_pad + before_pad_dim[1]],
        (original_dim[1] // 2, original_dim[0] // 2)) / 255

    # Generating word-bbox given character and affinity heatmap

    generated_targets = generate_word_bbox(
        character_bbox,
        affinity_bbox,
        character_threshold=config.threshold_character,
        affinity_threshold=config.threshold_affinity,
        word_threshold=config.threshold_word,
        character_threshold_upper=config.threshold_character_upper,
        affinity_threshold_upper=config.threshold_affinity_upper,
        scaling_character=config.scale_character,
        scaling_affinity=config.scale_affinity)

    generated_targets['word_bbox'] = generated_targets['word_bbox'] * 2
    generated_targets['characters'] = [
        i * 2 for i in generated_targets['characters']
    ]
    generated_targets['affinity'] = [
        i * 2 for i in generated_targets['affinity']
    ]

    if visualize:

        character_bbox = cv2.resize((character_bbox * 255).astype(np.uint8),
                                    (original_dim[1], original_dim[0])) / 255

        affinity_bbox = cv2.resize((affinity_bbox * 255).astype(np.uint8),
                                   (original_dim[1], original_dim[0])) / 255

        image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose(
            1, 2, 0))

        image_i = cv2.resize(
            image_i[height_pad:height_pad + before_pad_dim[0],
                    width_pad:width_pad + before_pad_dim[1]],
            (original_dim[1], original_dim[0]))

        # Saving affinity heat map
        plt.imsave(base_target_path + '_predicted/affinity/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   np.float32(affinity_bbox > config.threshold_affinity_upper),
                   cmap='gray')

        # Saving character heat map
        plt.imsave(
            base_target_path + '_predicted/character/' +
            '.'.join(image_name.split('.')[:-1]) + '.png',
            np.float32(character_bbox > config.threshold_character_upper),
            cmap='gray')

        cv2.drawContours(image_i, generated_targets['word_bbox'], -1,
                         (0, 255, 0), 2)

        # Saving word bbox drawn on the original image
        plt.imsave(
            base_target_path + '_predicted/word_bbox/' +
            '.'.join(image_name.split('.')[:-1]) + '.png', image_i)

    predicted_word_bbox = generated_targets['word_bbox'].copy()
    # --------------- PostProcessing for creating the targets for the next iteration ---------------- #
    generated_targets = get_weighted_character_target(
        generated_targets, {
            'bbox': annots['bbox'],
            'text': annots['text']
        }, dataloader.dataset.unknown, config.threshold_fscore,
        config.weight_threshold)
    target_word_bbox = generated_targets['word_bbox'].copy()

    f_score = calculate_fscore(
        predicted_word_bbox[:, :, 0, :],
        target_word_bbox[:, :, 0, :],
        text_target=annots['text'],
        unknown=dataloader.dataset.gt['unknown'])['f_score']

    if visualize:
        image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose(
            1, 2, 0))
        image_i = cv2.resize(
            image_i[height_pad:height_pad + before_pad_dim[0],
                    width_pad:width_pad + before_pad_dim[1]],
            (original_dim[1], original_dim[0]))

        # Generated word_bbox after postprocessing
        cv2.drawContours(image_i, generated_targets['word_bbox'], -1,
                         (0, 255, 0), 2)

        # Saving word bbox after postprocessing
        plt.imsave(
            base_target_path + '_next_target/word_bbox/' +
            '.'.join(image_name.split('.')[:-1]) + '.png', image_i)

        # Generate affinity heatmap after postprocessing
        affinity_target, affinity_weight_map = generate_target_others(
            (image_i.shape[0], image_i.shape[1]),
            generated_targets['affinity'].copy(),
            np.array(generated_targets['weights'])[:, 1])

        # Generate character heatmap after postprocessing
        character_target, characters_weight_map = generate_target_others(
            (image_i.shape[0], image_i.shape[1]),
            generated_targets['characters'].copy(),
            np.array(generated_targets['weights'])[:, 0])

        # Saving the affinity heatmap
        plt.imsave(base_target_path + '_next_target/affinity/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   affinity_target,
                   cmap='gray')

        # Saving the character heatmap
        plt.imsave(base_target_path + '_next_target/character/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   character_target,
                   cmap='gray')

        # Saving the affinity weight map
        plt.imsave(base_target_path + '_next_target/affinity_weight/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   affinity_weight_map,
                   cmap='gray')

        # Saving the character weight map
        plt.imsave(base_target_path + '_next_target/character_weight/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   characters_weight_map,
                   cmap='gray')

    # Saving the target for next iteration in json format

    generated_targets['word_bbox'] = generated_targets['word_bbox'].tolist()
    generated_targets['characters'] = [
        word_i.tolist() for word_i in generated_targets['characters']
    ]
    generated_targets['affinity'] = [
        word_i.tolist() for word_i in generated_targets['affinity']
    ]

    with open(base_target_path + '/' + image_name + '.json', 'w') as f:
        json.dump(generated_targets, f)

    return f_score
コード例 #7
0
ファイル: synthesize.py プロジェクト: ds-brx/CRAFT-Remade
def synthesize(
		dataloader,
		model, base_path_affinity, base_path_character, base_path_bbox, base_path_char, base_path_aff, base_path_json):

	"""

	Given a path to a set of images, and path to a pre-trained model, generate the character heatmap and affinity heatmap

	:param dataloader: A Pytorch dataloader for loading and resizing the images of the folder
	:param model: A pre-trained model
	:param base_path_affinity: Path where to store the predicted affinity heatmap
	:param base_path_character: Path where to store the predicted character heatmap
	:param base_path_bbox: Path where to store the word_bbox overlapped on images
	:param base_path_aff: Path where to store the predicted affinity bbox
	:param base_path_char: Path where to store the predicted character bbox
	:param base_path_json: Path where to store the predicted bbox in json format
	:return: None
	"""

	with torch.no_grad():

		model.eval()
		iterator = tqdm(dataloader)

		for no, (image, image_name, original_dim) in enumerate(iterator):

			if config.use_cuda:
				image = image.cuda()

			output = model(image)

			if type(output) == list:

				# If using custom DataParallelModel this is necessary to convert the list to tensor
				output = torch.cat(output, dim=0)

			output = output.data.cpu().numpy()
			output[output < 0] = 0
			output[output > 1] = 1
			original_dim = original_dim.cpu().numpy()

			for i in range(output.shape[0]):

				# --------- Resizing it back to the original image size and saving it ----------- #

				image_i = denormalize_mean_variance(image[i].data.cpu().numpy().transpose(1, 2, 0))

				max_dim = original_dim[i].max()
				resizing_factor = 768/max_dim
				before_pad_dim = [int(original_dim[i][0]*resizing_factor), int(original_dim[i][1]*resizing_factor)]

				output[i, :, :, :] = np.uint8(output[i, :, :, :]*255)

				height_pad = (768 - before_pad_dim[0])//2
				width_pad = (768 - before_pad_dim[1])//2

				image_i = cv2.resize(
					image_i[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0])
				)

				character_bbox = cv2.resize(
					output[i, 0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0])
				)/255

				affinity_bbox = cv2.resize(
					output[i, 1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0])
				)/255

				predicted_bbox = generate_word_bbox(
					character_bbox,
					affinity_bbox,
					character_threshold=config.threshold_character,
					affinity_threshold=config.threshold_affinity,
					word_threshold=config.threshold_word,
					character_threshold_upper=config.threshold_character_upper,
					affinity_threshold_upper=config.threshold_affinity_upper,
					scaling_character=config.scale_character,
					scaling_affinity=config.scale_affinity
				)

				word_bbox = predicted_bbox['word_bbox']
				char_bbox = np.concatenate(predicted_bbox['characters'], axis=0)
				aff_bbox = np.concatenate(predicted_bbox['affinity'], axis=0)

				word_image = image_i.copy()
				char_image = image_i.copy()
				aff_image = image_i.copy()

				cv2.drawContours(word_image, word_bbox, -1, (0, 255, 0), 2)
				cv2.drawContours(char_image, char_bbox, -1, (0, 255, 0), 2)
				cv2.drawContours(aff_image, aff_bbox, -1, (0, 255, 0), 2)

				# plt.imsave(
				# 	base_path_char + '/' + '.'.join(image_name[i].split('.')[:-1]) + '.png',
				# 	char_image)

				# plt.imsave(
				# 	base_path_aff + '/' + '.'.join(image_name[i].split('.')[:-1]) + '.png',
				# 	aff_image)

				plt.imsave(
					base_path_bbox + '/' + '.'.join(image_name[i].split('.')[:-1]) + '.png',
					word_image)

				# plt.imsave(
				# 	base_path_character + '/' + '.'.join(image_name[i].split('.')[:-1]) + '.png',
				# 	np.float32(character_bbox > config.threshold_character),
				# 	cmap='gray')

				# plt.imsave(
				# 	base_path_affinity+'/'+'.'.join(image_name[i].split('.')[:-1])+'.png',
				# 	np.float32(affinity_bbox > config.threshold_affinity),
				# 	cmap='gray')

				predicted_bbox['word_bbox'] = predicted_bbox['word_bbox'].tolist()
				predicted_bbox['characters'] = [_.tolist() for _ in predicted_bbox['characters']]
				predicted_bbox['affinity'] = [_.tolist() for _ in predicted_bbox['affinity']]

				with open(base_path_json + '/' + '.'.join(image_name[i].split('.')[:-1])+'.json', 'w') as f:
					json.dump(predicted_bbox, f)

				boxes_printed = 0
				BOX = []
				for boxes in predicted_bbox['word_bbox']:
					for box in boxes:
						BOX.append(box[0])
				BOX1= []
				BOX2 = []
				count = 1
				for boxes in BOX:
					BOX1.append(boxes)
					if count%4 ==0:
						BOX2.append(BOX1)
						BOX1 = []
					count += 1
					
				for box in BOX2:
					x = [p[0] for p in box]
					y = [p[1] for p in box]
					min_x = min(x)
					max_x = max(x)
					min_y = min(y)
					max_y = max(y)

					img = image_i.copy()
					crop_img = img[min_y:max_y, min_x:max_x]
					text = pytesseract.image_to_string(crop_img).rstrip()
					boxes_printed += 1
					f = open("./text_folder" + '/' + '.'.join(image_name[i].split('.')[:-1]) + '.txt', "a")
					f.write(text)
					f.write('\n')
					f.close()
				f = open("./text_folder" + '/' + '.'.join(image_name[i].split('.')[:-1]) + '.txt', "a")
				f.write("Total number of boxes : " + str(len(BOX2)))
				f.write('\n')
				f.write("Total number of boxes printed: " + str(boxes_printed))
				f.close()
コード例 #8
0
def train(model, optimizer, iteration):

	"""
	Train the weak-supervised model iteratively
	:param model: Pre-trained model on SynthText
	:param optimizer: Pre-trained model's optimizer
	:param iteration: current iteration of weak-supervision
	:return: model, optimizer
	"""

	def change_lr():

		# Change learning rate while training
		for param_group in optimizer.param_groups:
			param_group['lr'] = config.lr[iteration]

		print('Learning Rate Changed to ', config.lr[iteration])

	change_lr()

	dataloader = DataLoader(
		DataLoaderMIX('train', iteration), batch_size=config.batch_size['train'], num_workers=8, shuffle=True)
	loss_criterian = DataParallelCriterion(Criterian())

	model.train()
	optimizer.zero_grad()
	iterator = tqdm(dataloader)

	all_loss = []
	all_accuracy = []
	all_count = []

	ground_truth = iterator.iterable.dataset.gt

	for no, (image, character_map, affinity_map, character_weight, affinity_weight, word_bbox, original_dim) in \
		enumerate(iterator):

		if config.use_cuda:
			image, character_map, affinity_map = image.cuda(), character_map.cuda(), affinity_map.cuda()
			character_weight, affinity_weight = character_weight.cuda(), affinity_weight.cuda()

		output = model(image)
		loss = loss_criterian(output, character_map, affinity_map, character_weight, affinity_weight).mean()/4

		all_loss.append(loss.item()*4)

		loss.backward()

		if (no + 1) % 4 == 0:
			optimizer.step()
			optimizer.zero_grad()

		# ---------- Calculating the F-score ------------ #

		if type(output) == list:
			output = torch.cat(output, dim=0)

		output = output.data.cpu().numpy()
		# image = image.data.cpu().numpy()
		original_dim = original_dim.cpu().numpy()

		target_bbox = []
		predicted_ic13 = []
		current_count = 0

		word_bbox = word_bbox.numpy()

		for __, _ in enumerate(word_bbox):

			if _[1] == 1:

				# ToDo - Understand why model.train() gives poor results but model.eval() with torch.no_grad() gives better results

				max_dim = original_dim[__].max()
				resizing_factor = 768 / max_dim
				before_pad_dim = [int(original_dim[__][0] * resizing_factor), int(original_dim[__][1] * resizing_factor)]

				output[__, :, :, :] = np.uint8(output[__, :, :, :] * 255)

				height_pad = (768 - before_pad_dim[0]) // 2
				width_pad = (768 - before_pad_dim[1]) // 2

				character_bbox = cv2.resize(
					output[__, 0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[__][1], original_dim[__][0])) / 255

				affinity_bbox = cv2.resize(
					output[__, 1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[__][1], original_dim[__][0])) / 255

				predicted_bbox = generate_word_bbox(
					character_bbox,
					affinity_bbox,
					character_threshold=config.threshold_character,
					affinity_threshold=config.threshold_affinity,
					word_threshold=config.threshold_word)['word_bbox']

				predicted_ic13.append(predicted_bbox)
				target_bbox.append(np.array(ground_truth[_[0] % len(ground_truth)][1]['word_bbox'], dtype=np.int64))

				current_count += 1

		all_accuracy.append(
			calculate_batch_fscore(
				predicted_ic13,
				target_bbox,
				threshold=config.threshold_fscore)*current_count
		)

		all_count.append(current_count)

		# ------------- Setting Description ---------------- #

		if np.array(all_count)[-min(1000, len(all_count)):].sum() != 0:
			f_score = int(
						np.array(all_accuracy)[-min(1000, len(all_accuracy)):].sum() * 100000000 /
						np.array(all_count)[-min(1000, len(all_count)):].sum()) / 100000000
		else:
			f_score = 0

		iterator.set_description(
			'Loss:' + str(int(loss.item() * 4 * 100000) / 100000) + ' Iterations:[' + str(no) + '/' + str(
				len(iterator)) +
			'] Average Loss:' + str(
				int(np.array(all_loss)[-min(1000, len(all_loss)):].mean() * 100000) / 100000) +
			'| Average F-Score: ' + str(f_score)
		)

	if len(iterator) % 4 != 0:

		optimizer.step()
		optimizer.zero_grad()

	torch.cuda.empty_cache()

	return model, optimizer, all_loss, all_accuracy
コード例 #9
0
def test(model):

	"""
	Test the weak-supervised model
	:param model: Pre-trained model on SynthText
	:return: F-score, loss
	"""

	dataloader = DataLoader(
		DataLoaderEvalICDAR2013('test'), batch_size=config.batch_size['train'], num_workers=8, shuffle=False)

	with torch.no_grad():
		model.eval()
		iterator = tqdm(dataloader)
		all_accuracy = []

		ground_truth = dataloader.dataset.gt

		for no, (image, image_name, original_dim, item) in enumerate(iterator):

			annots = []

			for i in item:
				annot = ground_truth['annots'][dataloader.dataset.imnames[i]]
				annots.append(annot)

			if config.use_cuda:
				image = image.cuda()

			output = model(image)

			if type(output) == list:
				output = torch.cat(output, dim=0)

			output = output.data.cpu().numpy()
			original_dim = original_dim.cpu().numpy()

			f_score = []

			for i in range(output.shape[0]):
				# --------- Resizing it back to the original image size and saving it ----------- #

				max_dim = original_dim[i].max()
				resizing_factor = 768 / max_dim
				before_pad_dim = [int(original_dim[i][0] * resizing_factor), int(original_dim[i][1] * resizing_factor)]

				output[i, :, :, :] = np.uint8(output[i, :, :, :] * 255)

				height_pad = (768 - before_pad_dim[0]) // 2
				width_pad = (768 - before_pad_dim[1]) // 2

				character_bbox = cv2.resize(
					output[i, 0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0])) / 255

				affinity_bbox = cv2.resize(
					output[i, 1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0])) / 255

				generated_targets = generate_word_bbox(
					character_bbox, affinity_bbox,
					character_threshold=config.threshold_character,
					affinity_threshold=config.threshold_affinity,
					word_threshold=config.threshold_word)

				predicted_word_bbox = generated_targets['word_bbox'].copy()

				f_score.append(calculate_fscore(predicted_word_bbox[:, :, 0, :], np.array(annots[i]['bbox'])))
				# --------------- PostProcessing for creating the targets for the next iteration ---------------- #

			all_accuracy.append(np.mean(f_score))

			iterator.set_description('F-score: ' + str(np.mean(all_accuracy)))

		torch.cuda.empty_cache()

	return np.mean(all_accuracy)