Beispiel #1
0
def test(model):
    """
	Test the weak-supervised model
	:param model: Pre-trained model on SynthText
	:return: F-score, loss
	"""

    dataloader = DataLoader(DataLoaderEvalICDAR2013('test'),
                            batch_size=config.batch_size['train'],
                            num_workers=8,
                            shuffle=False)

    with torch.no_grad():
        model.eval()
        iterator = tqdm(dataloader)
        all_accuracy = []

        ground_truth = dataloader.dataset.gt

        for no, (image, image_name, original_dim, item) in enumerate(iterator):

            annots = []

            for i in item:
                annot = ground_truth['annots'][dataloader.dataset.imnames[i]]
                annots.append(annot)

            if config.use_cuda:
                image = image.cuda()

            output = model(image)

            if type(output) == list:
                output = torch.cat(output, dim=0)

            output = output.data.cpu().numpy()
            original_dim = original_dim.cpu().numpy()

            f_score = []

            for i in range(output.shape[0]):
                # --------- Resizing it back to the original image size and saving it ----------- #

                f_score.append(
                    calculate_fscore(
                        resize_bbox(original_dim[i], output[i],
                                    config)['word_bbox'][:, :, 0, :],
                        np.array(annots[i]['bbox']),
                        text_target=annots[i]['text'],
                    ))

                # --------------- PostProcessing for creating the targets for the next iteration ---------------- #

            all_accuracy.append(np.mean(f_score))

            iterator.set_description('F-score: ' + str(np.mean(all_accuracy)))

        torch.cuda.empty_cache()

    return np.mean(all_accuracy)
Beispiel #2
0
def test(model, iteration):

	"""
	Test the weak-supervised model
	:param model: Pre-trained model on SynthText
	:param iteration: Iteration Number
	:return: F-score, loss
	"""

	os.makedirs(config.save_path + '/Test_'+str(iteration), exist_ok=True)

	dataloader = DataLoader(
		DataLoaderEvalOther('test'),
		batch_size=config.batch_size['test'],
		num_workers=config.num_workers['test'],
		shuffle=False, worker_init_fn=_init_fn
	)

	true_positive = 0
	false_positive = 0
	num_positive = 0

	with torch.no_grad():

		model.eval()
		iterator = tqdm(dataloader)
		all_accuracy = []

		ground_truth = dataloader.dataset.gt

		for no, (image, image_name, original_dim, item) in enumerate(iterator):

			annots = []

			for i in item:
				annot = ground_truth['annots'][dataloader.dataset.imnames[i]]
				annots.append(annot)

			if config.use_cuda:
				image = image.cuda()

			output = model(image)

			if type(output) == list:
				output = torch.cat(output, dim=0)

			output = output.data.cpu().numpy()

			output[output > 1] = 1
			output[output < 0] = 0

			original_dim = original_dim.cpu().numpy()

			f_score = []

			for i in range(output.shape[0]):

				# --------- Resizing it back to the original image size and saving it ----------- #

				cur_image = denormalize_mean_variance(image[i].data.cpu().numpy().transpose(1, 2, 0))

				max_dim = original_dim[i].max()
				resizing_factor = 768 / max_dim
				before_pad_dim = [int(original_dim[i][0] * resizing_factor), int(original_dim[i][1] * resizing_factor)]

				height_pad = (768 - before_pad_dim[0]) // 2
				width_pad = (768 - before_pad_dim[1]) // 2

				cur_image = cv2.resize(
					cur_image[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0]))

				cv2.drawContours(cur_image, resize_bbox(original_dim[i], output[i], config)['word_bbox'], -1, (0, 255, 0), 2)
				cv2.drawContours(cur_image, np.array(annots[i]['bbox']), -1, (0, 0, 255), 2)

				plt.imsave(
					config.save_path + '/Test_' + str(iteration) + '/' + image_name[i],
					cur_image.astype(np.uint8))

				score_calc = calculate_fscore(
						resize_bbox(original_dim[i], output[i], config)['word_bbox'][:, :, 0, :],
						np.array(annots[i]['bbox']),
						text_target=annots[i]['text'],
					)

				f_score.append(
					score_calc['f_score']
				)
				true_positive += score_calc['true_positive']
				false_positive += score_calc['false_positive']
				num_positive += score_calc['num_positive']

				# --------------- PostProcessing for creating the targets for the next iteration ---------------- #

			all_accuracy.append(np.mean(f_score))

			precision = true_positive / (true_positive + false_positive)
			recall = true_positive / num_positive

			iterator.set_description(
				'F-score: ' + str(np.mean(all_accuracy)) + '| Cumulative F-score: '
				+ str(2*precision*recall/(precision + recall)))

		torch.cuda.empty_cache()

	return 2*precision*recall/(precision + recall), precision, recall
def generate_next_targets(original_dim, output, image, base_target_path,
                          image_name, annots, dataloader, no):
    if 'datapile' in config.dataset_name:
        image_name = image_name.split('/')[-1]
    # visualize = config.visualize_generated and no % config.visualize_freq == 0 and no != 0
    visualize = config.visualize_generated  # Just for debuging
    max_dim = original_dim.max()
    resizing_factor = 768 / max_dim
    before_pad_dim = [
        int(original_dim[0] * resizing_factor),
        int(original_dim[1] * resizing_factor)
    ]

    output = np.uint8(output * 255)

    height_pad = (768 - before_pad_dim[0]) // 2
    width_pad = (768 - before_pad_dim[1]) // 2

    character_bbox = cv2.resize(
        output[0, height_pad:height_pad + before_pad_dim[0],
               width_pad:width_pad + before_pad_dim[1]],
        (original_dim[1] // 2, original_dim[0] // 2)) / 255

    affinity_bbox = cv2.resize(
        output[1, height_pad:height_pad + before_pad_dim[0],
               width_pad:width_pad + before_pad_dim[1]],
        (original_dim[1] // 2, original_dim[0] // 2)) / 255

    # Generating word-bbox given character and affinity heatmap

    generated_targets = generate_word_bbox(
        character_bbox,
        affinity_bbox,
        character_threshold=config.threshold_character,
        affinity_threshold=config.threshold_affinity,
        word_threshold=config.threshold_word,
        character_threshold_upper=config.threshold_character_upper,
        affinity_threshold_upper=config.threshold_affinity_upper,
        scaling_character=config.scale_character,
        scaling_affinity=config.scale_affinity)

    generated_targets['word_bbox'] = generated_targets['word_bbox'] * 2
    generated_targets['characters'] = [
        i * 2 for i in generated_targets['characters']
    ]
    generated_targets['affinity'] = [
        i * 2 for i in generated_targets['affinity']
    ]

    if visualize:

        character_bbox = cv2.resize((character_bbox * 255).astype(np.uint8),
                                    (original_dim[1], original_dim[0])) / 255

        affinity_bbox = cv2.resize((affinity_bbox * 255).astype(np.uint8),
                                   (original_dim[1], original_dim[0])) / 255

        image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose(
            1, 2, 0))

        image_i = cv2.resize(
            image_i[height_pad:height_pad + before_pad_dim[0],
                    width_pad:width_pad + before_pad_dim[1]],
            (original_dim[1], original_dim[0]))

        # Saving affinity heat map
        plt.imsave(base_target_path + '_predicted/affinity/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   np.float32(affinity_bbox > config.threshold_affinity_upper),
                   cmap='gray')

        # Saving character heat map
        plt.imsave(
            base_target_path + '_predicted/character/' +
            '.'.join(image_name.split('.')[:-1]) + '.png',
            np.float32(character_bbox > config.threshold_character_upper),
            cmap='gray')

        cv2.drawContours(image_i, generated_targets['word_bbox'], -1,
                         (0, 255, 0), 2)

        # Saving word bbox drawn on the original image
        plt.imsave(
            base_target_path + '_predicted/word_bbox/' +
            '.'.join(image_name.split('.')[:-1]) + '.png', image_i)

    predicted_word_bbox = generated_targets['word_bbox'].copy()
    # --------------- PostProcessing for creating the targets for the next iteration ---------------- #
    generated_targets = get_weighted_character_target(
        generated_targets, {
            'bbox': annots['bbox'],
            'text': annots['text']
        }, dataloader.dataset.unknown, config.threshold_fscore,
        config.weight_threshold)
    target_word_bbox = generated_targets['word_bbox'].copy()

    f_score = calculate_fscore(
        predicted_word_bbox[:, :, 0, :],
        target_word_bbox[:, :, 0, :],
        text_target=annots['text'],
        unknown=dataloader.dataset.gt['unknown'])['f_score']

    if visualize:
        image_i = denormalize_mean_variance(image.data.cpu().numpy().transpose(
            1, 2, 0))
        image_i = cv2.resize(
            image_i[height_pad:height_pad + before_pad_dim[0],
                    width_pad:width_pad + before_pad_dim[1]],
            (original_dim[1], original_dim[0]))

        # Generated word_bbox after postprocessing
        cv2.drawContours(image_i, generated_targets['word_bbox'], -1,
                         (0, 255, 0), 2)

        # Saving word bbox after postprocessing
        plt.imsave(
            base_target_path + '_next_target/word_bbox/' +
            '.'.join(image_name.split('.')[:-1]) + '.png', image_i)

        # Generate affinity heatmap after postprocessing
        affinity_target, affinity_weight_map = generate_target_others(
            (image_i.shape[0], image_i.shape[1]),
            generated_targets['affinity'].copy(),
            np.array(generated_targets['weights'])[:, 1])

        # Generate character heatmap after postprocessing
        character_target, characters_weight_map = generate_target_others(
            (image_i.shape[0], image_i.shape[1]),
            generated_targets['characters'].copy(),
            np.array(generated_targets['weights'])[:, 0])

        # Saving the affinity heatmap
        plt.imsave(base_target_path + '_next_target/affinity/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   affinity_target,
                   cmap='gray')

        # Saving the character heatmap
        plt.imsave(base_target_path + '_next_target/character/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   character_target,
                   cmap='gray')

        # Saving the affinity weight map
        plt.imsave(base_target_path + '_next_target/affinity_weight/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   affinity_weight_map,
                   cmap='gray')

        # Saving the character weight map
        plt.imsave(base_target_path + '_next_target/character_weight/' +
                   '.'.join(image_name.split('.')[:-1]) + '.png',
                   characters_weight_map,
                   cmap='gray')

    # Saving the target for next iteration in json format

    generated_targets['word_bbox'] = generated_targets['word_bbox'].tolist()
    generated_targets['characters'] = [
        word_i.tolist() for word_i in generated_targets['characters']
    ]
    generated_targets['affinity'] = [
        word_i.tolist() for word_i in generated_targets['affinity']
    ]

    with open(base_target_path + '/' + image_name + '.json', 'w') as f:
        json.dump(generated_targets, f)

    return f_score
Beispiel #4
0
def test(model):

	"""
	Test the weak-supervised model
	:param model: Pre-trained model on SynthText
	:return: F-score, loss
	"""

	dataloader = DataLoader(
		DataLoaderEvalICDAR2013('test'), batch_size=config.batch_size['train'], num_workers=8, shuffle=False)

	with torch.no_grad():
		model.eval()
		iterator = tqdm(dataloader)
		all_accuracy = []

		ground_truth = dataloader.dataset.gt

		for no, (image, image_name, original_dim, item) in enumerate(iterator):

			annots = []

			for i in item:
				annot = ground_truth['annots'][dataloader.dataset.imnames[i]]
				annots.append(annot)

			if config.use_cuda:
				image = image.cuda()

			output = model(image)

			if type(output) == list:
				output = torch.cat(output, dim=0)

			output = output.data.cpu().numpy()
			original_dim = original_dim.cpu().numpy()

			f_score = []

			for i in range(output.shape[0]):
				# --------- Resizing it back to the original image size and saving it ----------- #

				max_dim = original_dim[i].max()
				resizing_factor = 768 / max_dim
				before_pad_dim = [int(original_dim[i][0] * resizing_factor), int(original_dim[i][1] * resizing_factor)]

				output[i, :, :, :] = np.uint8(output[i, :, :, :] * 255)

				height_pad = (768 - before_pad_dim[0]) // 2
				width_pad = (768 - before_pad_dim[1]) // 2

				character_bbox = cv2.resize(
					output[i, 0, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0])) / 255

				affinity_bbox = cv2.resize(
					output[i, 1, height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
					(original_dim[i][1], original_dim[i][0])) / 255

				generated_targets = generate_word_bbox(
					character_bbox, affinity_bbox,
					character_threshold=config.threshold_character,
					affinity_threshold=config.threshold_affinity,
					word_threshold=config.threshold_word)

				predicted_word_bbox = generated_targets['word_bbox'].copy()

				f_score.append(calculate_fscore(predicted_word_bbox[:, :, 0, :], np.array(annots[i]['bbox'])))
				# --------------- PostProcessing for creating the targets for the next iteration ---------------- #

			all_accuracy.append(np.mean(f_score))

			iterator.set_description('F-score: ' + str(np.mean(all_accuracy)))

		torch.cuda.empty_cache()

	return np.mean(all_accuracy)
Beispiel #5
0
def synthesize_with_score(dataloader, model, base_target_path, iteration):
    """
	Given a path to a set of images(icdar 2013 dataset), and path to a pre-trained model, generate the character heatmap
	and affinity heatmap and a json of all the annotations
	:param dataloader: dataloader for icdar 2013 dataset
	:param model: pre-trained model
	:param base_target_path: path where to store the predictions
	:return:
	"""

    with torch.no_grad():

        model.eval()
        iterator = tqdm(dataloader)

        mean_f_score = []

        for no, (image, image_name, original_dim, item) in enumerate(iterator):

            annots = []

            for i in item:
                annot = dataloader.dataset.gt['annots'][
                    dataloader.dataset.imnames[i]]
                annots.append(annot)

            if config.use_cuda:
                image = image.cuda()

            output = model(image)

            if type(output) == list:
                output = torch.cat(output, dim=0)

            output = output.data.cpu().numpy()
            original_dim = original_dim.cpu().numpy()

            f_score = []

            for i in range(output.shape[0]):

                # --------- Resizing it back to the original image size and saving it ----------- #

                max_dim = original_dim[i].max()
                resizing_factor = 768 / max_dim
                before_pad_dim = [
                    int(original_dim[i][0] * resizing_factor),
                    int(original_dim[i][1] * resizing_factor)
                ]

                output[i, :, :, :] = np.uint8(output[i, :, :, :] * 255)

                height_pad = (768 - before_pad_dim[0]) // 2
                width_pad = (768 - before_pad_dim[1]) // 2

                character_bbox = cv2.resize(
                    output[i, 0, height_pad:height_pad + before_pad_dim[0],
                           width_pad:width_pad + before_pad_dim[1]],
                    (original_dim[i][1], original_dim[i][0])) / 255

                affinity_bbox = cv2.resize(
                    output[i, 1, height_pad:height_pad + before_pad_dim[0],
                           width_pad:width_pad + before_pad_dim[1]],
                    (original_dim[i][1], original_dim[i][0])) / 255

                if config.visualize_generated:

                    image_i = denormalize_mean_variance(
                        image[i].data.cpu().numpy().transpose(1, 2, 0))

                    image_i = cv2.resize(
                        image_i[height_pad:height_pad + before_pad_dim[0],
                                width_pad:width_pad + before_pad_dim[1]],
                        (original_dim[i][1], original_dim[i][0]))

                # Generating word-bbox given character and affinity heatmap

                generated_targets = generate_word_bbox(
                    character_bbox,
                    affinity_bbox,
                    character_threshold=config.threshold_character,
                    affinity_threshold=config.threshold_affinity,
                    word_threshold=config.threshold_word)

                if config.visualize_generated:

                    # Saving affinity heat map
                    plt.imsave(
                        base_target_path + '_predicted/affinity/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        np.float32(affinity_bbox > config.threshold_affinity),
                        cmap='gray')

                    # Saving character heat map
                    plt.imsave(
                        base_target_path + '_predicted/character/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        np.float32(
                            character_bbox > config.threshold_character),
                        cmap='gray')

                    cv2.drawContours(image_i, generated_targets['word_bbox'],
                                     -1, (0, 255, 0), 2)

                    # Saving word bbox drawn on the original image
                    plt.imsave(
                        base_target_path + '_predicted/word_bbox/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        image_i)

                predicted_word_bbox = generated_targets['word_bbox'].copy()
                # --------------- PostProcessing for creating the targets for the next iteration ---------------- #

                generated_targets = get_weighted_character_target(
                    generated_targets, {
                        'bbox': annots[i]['bbox'],
                        'text': annots[i]['text']
                    }, dataloader.dataset.unknown, config.threshold_fscore,
                    config.weight_threshold[iteration])

                target_word_bbox = generated_targets['word_bbox'].copy()

                f_score.append(
                    calculate_fscore(predicted_word_bbox[:, :, 0, :],
                                     target_word_bbox[:, :, 0, :]))

                if config.visualize_generated:

                    image_i = denormalize_mean_variance(
                        image[i].data.cpu().numpy().transpose(1, 2, 0))
                    image_i = cv2.resize(
                        image_i[height_pad:height_pad + before_pad_dim[0],
                                width_pad:width_pad + before_pad_dim[1]],
                        (original_dim[i][1], original_dim[i][0]))

                    # Generated word_bbox after postprocessing
                    cv2.drawContours(image_i, generated_targets['word_bbox'],
                                     -1, (0, 255, 0), 2)

                    # Saving word bbox after postprocessing
                    plt.imsave(
                        base_target_path + '_next_target/word_bbox/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        image_i)

                    # Generate affinity heatmap after postprocessing
                    affinity_target, affinity_weight_map = generate_target_others(
                        (image_i.shape[0], image_i.shape[1]),
                        generated_targets['affinity'].copy(),
                        generated_targets['weights'].copy())

                    # Generate character heatmap after postprocessing
                    character_target, characters_weight_map = generate_target_others(
                        (image_i.shape[0], image_i.shape[1]),
                        generated_targets['characters'].copy(),
                        generated_targets['weights'].copy())

                    # Saving the affinity heatmap
                    plt.imsave(base_target_path + '_next_target/affinity/' +
                               '.'.join(image_name[i].split('.')[:-1]) +
                               '.png',
                               affinity_target,
                               cmap='gray')

                    # Saving the character heatmap
                    plt.imsave(base_target_path + '_next_target/character/' +
                               '.'.join(image_name[i].split('.')[:-1]) +
                               '.png',
                               character_target,
                               cmap='gray')

                    # Saving the affinity weight map
                    plt.imsave(
                        base_target_path + '_next_target/affinity_weight/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        affinity_weight_map,
                        cmap='gray')

                    # Saving the character weight map
                    plt.imsave(
                        base_target_path + '_next_target/character_weight/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.png',
                        characters_weight_map,
                        cmap='gray')

                # Saving the target for next iteration in json format

                generated_targets['word_bbox'] = generated_targets[
                    'word_bbox'].tolist()
                generated_targets['characters'] = [
                    word_i.tolist()
                    for word_i in generated_targets['characters']
                ]
                generated_targets['affinity'] = [
                    word_i.tolist() for word_i in generated_targets['affinity']
                ]

                with open(
                        base_target_path + '/' +
                        '.'.join(image_name[i].split('.')[:-1]) + '.json',
                        'w') as f:
                    json.dump(generated_targets, f)

            mean_f_score.append(np.mean(f_score))

            iterator.set_description('F-score: ' + str(np.mean(mean_f_score)))
Beispiel #6
0
def test(model):

	"""
	Test the weak-supervised model
	:param model: Pre-trained model on SynthText
	:return: F-score, loss
	"""

	dataloader = DataLoader(
		DataLoaderEvalOther('test'),
		batch_size=config.batch_size['test'],
		num_workers=config.num_workers['test'],
		shuffle=False
	)

	true_positive = 0
	false_positive = 0
	num_positive = 0

	with torch.no_grad():

		model.eval()
		iterator = tqdm(dataloader)
		all_accuracy = []

		ground_truth = dataloader.dataset.gt

		for no, (image, image_name, original_dim, item) in enumerate(iterator):

			annots = []

			for i in item:
				annot = ground_truth['annots'][dataloader.dataset.imnames[i]]
				annots.append(annot)

			if config.use_cuda:
				image = image.cuda()

			output = model(image)

			if type(output) == list:
				output = torch.cat(output, dim=0)

			output = output.data.cpu().numpy()
			original_dim = original_dim.cpu().numpy()

			f_score = []

			for i in range(output.shape[0]):

				# ToDo - Visualise the test results
				# ToDo - Why is F-score of testing always less than F-score of training at iteration 0?

				# --------- Resizing it back to the original image size and saving it ----------- #

				# cur_image = denormalize_mean_variance(image[i].data.cpu().numpy().transpose(1, 2, 0))
				#
				# max_dim = original_dim[i].max()
				# resizing_factor = 768 / max_dim
				# before_pad_dim = [int(original_dim[i][0] * resizing_factor), int(original_dim[i][1] * resizing_factor)]
				#
				# height_pad = (768 - before_pad_dim[0]) // 2
				# width_pad = (768 - before_pad_dim[1]) // 2
				#
				# cur_image_backup = cv2.resize(
				# 	cur_image[height_pad:height_pad + before_pad_dim[0], width_pad:width_pad + before_pad_dim[1]],
				# 	(original_dim[i][1], original_dim[i][0]))
				#
				# cur_image = cur_image_backup.copy()
				#
				# cv2.drawContours(cur_image, resize_bbox(original_dim[i], output[i], config)['word_bbox'], -1, (0, 255, 0), 2)
				# plt.imsave(str(i)+'_predicted.png', cur_image.astype(np.uint8))
				#
				# cur_image = cur_image_backup.copy()
				# cv2.drawContours(cur_image, np.array(annots[i]['bbox']), -1, (0, 255, 0), 2)
				# plt.imsave(str(i) + '_target.png', cur_image.astype(np.uint8))

				score_calc = calculate_fscore(
						resize_bbox(original_dim[i], output[i], config)['word_bbox'][:, :, 0, :],
						np.array(annots[i]['bbox']),
						text_target=annots[i]['text'],
					)
				f_score.append(
					score_calc['f_score']
				)
				true_positive += score_calc['true_positive']
				false_positive += score_calc['false_positive']
				num_positive += score_calc['num_positive']

				# --------------- PostProcessing for creating the targets for the next iteration ---------------- #

			# exit(0)

			all_accuracy.append(np.mean(f_score))

			precision = true_positive / (true_positive + false_positive)
			recall = true_positive / num_positive

			iterator.set_description(
				'F-score: ' + str(np.mean(all_accuracy)) + '| Cumulative F-score: '
				+ str(2*precision*recall/(precision + recall)))

		torch.cuda.empty_cache()

	return np.mean(all_accuracy)