Esempi in Python per ground_truth_to_word

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: data_provider.data_utils

Metodo/funzione: ground_truth_to_word

Esempi su hotexamples.com: 5

ground_truth_to_word in Python: 5 esempi trovati. Questi sono i migliori esempi reali in Python per data_provider.data_utils.ground_truth_to_word, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: main_test.py Progetto: shiwuwen/DRWord

def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise
    """
    if FLAGS.use_vacab and os.path.exists("./vocab.txt"):
        bk_tree = BKTree(levenshtein, list_words('./vocab.txt'))
        # bk_tree = bktree.Tree()
    """
    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        input_feature_map = tf.placeholder(tf.float32,
                                           shape=[None, None, None, 32],
                                           name='input_feature_map')
        input_transform_matrix = tf.placeholder(tf.float32,
                                                shape=[None, 6],
                                                name='input_transform_matrix')
        input_box_mask = []
        input_box_mask.append(
            tf.placeholder(tf.int32, shape=[None], name='input_box_masks_0'))
        input_box_widths = tf.placeholder(tf.int32,
                                          shape=[None],
                                          name='input_box_widths')

        input_seq_len = input_box_widths[tf.argmax(
            input_box_widths, 0)] * tf.ones_like(input_box_widths)
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        shared_feature, f_geometry = detect_part.model(input_images)
        pad_rois = roi_rotate_part.roi_rotate_tensor_pad(
            input_feature_map, input_transform_matrix, input_box_mask,
            input_box_widths)
        recognition_logits = recognize_part.build_graph(
            pad_rois, input_box_widths)
        _, dense_decode = recognize_part.decode(recognition_logits,
                                                input_box_widths)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            #用于测试中断后继续当前测试 n=10000-index
            im_fn_list = im_fn_list[0:]
            index = len(im_fn_list)
            for im_fn in im_fn_list:

                print('still have {} pictures left'.format(index))
                print(im_fn)

                #image for draw quad
                im = cv2.imread(im_fn)[:, :, ::-1]
                im_resized, (ratio_h, ratio_w) = resize_image(im)

                #method 2
                #im = image.load_img(im_fn)
                #im_draw = im.copy
                #d_wight, d_height = resize_image2(im)
                #im_resized = im.resize((d_wight, d_height), Image.NEAREST).convert('RGB')
                #im_draw = cv2.resize(im_draw, (int(d_wight), int(d_height)))
                #ratio_w = d_wight / im.width
                #ratio_h = d_height / im.height

                img = image.img_to_array(im_resized)
                img = preprocess_input(img, mode='tf')

                shared_feature_map, geometry = sess.run(
                    [shared_feature, f_geometry],
                    feed_dict={input_images: [img]})

                geometry = np.squeeze(geometry, axis=0)
                print('geometry is : ', (np.array(geometry)).shape)
                geometry[:, :, :3] = sigmoid(geometry[:, :, :3])
                #print('geometry : ' ,geometry[40:50,40:50,0:7])

                pixel_threshold = 0.9
                cond = np.greater_equal(geometry[:, :, 0], pixel_threshold)
                activation_pixels = np.where(cond)
                print('activation_pixels is : ',
                      (np.array(activation_pixels)).shape)

                #for i,j in zip(activation_pixels[0], activation_pixels[1]):
                #print(geometry[i, j, 0:7])

                #boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer)
                quad_scores, boxes = nms(geometry, activation_pixels)
                #print('boxes : ',boxes)

                input_roi_boxes = []
                for score, box in zip(quad_scores, boxes):
                    if np.amin(score) > 0:
                        #print(type(box))
                        box = box[[0, 3, 2, 1]]
                        input_roi_boxes.append(box)
                input_roi_boxes = np.array(input_roi_boxes)
                #print('input_roi_boxes : ',input_roi_boxes[:])

                #im_txt = None
                if input_roi_boxes is not None and input_roi_boxes.shape[
                        0] != 0:

                    #for ICDAR
                    #res_file_path = os.path.join(FLAGS.output_dir, 'res_' + '{}.txt'.format(os.path.basename(im_fn).split('.')[0]))

                    #for MTWI
                    res_file_path = os.path.join(
                        FLAGS.output_dir,
                        '{}.txt'.format(im_fn[:-4].split('/')[-1]))

                    input_roi_boxes = input_roi_boxes[:, :8].reshape(-1, 8)
                    recog_decode_list = []
                    # Here avoid too many text area leading to OOM
                    for batch_index in range(input_roi_boxes.shape[0] // 32 +
                                             1):  # test roi batch size is 32
                        start_slice_index = batch_index * 32
                        end_slice_index = (
                            batch_index + 1
                        ) * 32 if input_roi_boxes.shape[0] >= (
                            batch_index + 1) * 32 else input_roi_boxes.shape[0]
                        tmp_roi_boxes = input_roi_boxes[
                            start_slice_index:end_slice_index]

                        boxes_masks = [0] * tmp_roi_boxes.shape[0]

                        transform_matrixes, box_widths = get_project_matrix_and_width(
                            tmp_roi_boxes)
                        # max_box_widths = max_width * np.ones(boxes_masks.shape[0]) # seq_len

                        # Run end to end
                        try:
                            recog_decode = sess.run(dense_decode,
                                                    feed_dict={
                                                        input_feature_map:
                                                        shared_feature_map,
                                                        input_transform_matrix:
                                                        transform_matrixes,
                                                        input_box_mask[0]:
                                                        boxes_masks,
                                                        input_box_widths:
                                                        box_widths
                                                    })
                            recog_decode_list.extend([r for r in recog_decode])
                        except:
                            recog_decode_list.extend([None])

                    if len(recog_decode_list) != input_roi_boxes.shape[0]:
                        print(
                            "detection and recognition result are not equal!")
                        with open(
                                '/home/wsw/deeplearning/advancedFOTS/somepicwrong.txt',
                                'a') as f:
                            f.write('{}\r\n'.format(im_fn))
                        wrong_file_path = os.path.join(
                            '/home/wsw/deeplearning/advancedFOTS/picwrong/',
                            os.path.basename(im_fn))
                        cv2.imwrite(wrong_file_path, im[:, :, ::-1])
                        index -= 1
                        print('------------------------------------')
                        continue
                        #exit(-1)

                    # Preparing for draw boxes
                    #boxes = boxes[:, :8].reshape((-1, 4, 2))
                    #boxes[:, :, 0] /= ratio_w
                    #boxes[:, :, 1] /= ratio_h
                    input_roi_boxes = input_roi_boxes[:, :8].reshape(
                        (-1, 4, 2))
                    #input_roi_boxes[:, :, 0] /= ratio_w
                    #input_roi_boxes[:, :, 0] /= ratio_w

                    with open(res_file_path, 'w') as f:
                        #for score, box, i in zip(quad_scores, boxes, range(len(input_roi_boxes))):
                        for i, box in enumerate(input_roi_boxes):
                            #if np.amin(score) > 0:
                            if True:

                                # for ICDAR to avoid submitting errors
                                #box = sort_poly(box.astype(np.int32))

                                # for MTWI
                                box = box[[0, 3, 2, 1]]
                                #box = box.astype(np.int32)

                                box = box / [ratio_w, ratio_h]
                                box = box.astype(np.int32)

                                recognition_result = ground_truth_to_word(
                                    recog_decode_list[i])
                                print('recognition_result : ',
                                      recognition_result)

                                f.write(
                                    '{},{},{},{},{},{},{},{},{}\r\n'.format(
                                        box[0, 0], box[0, 1], box[1, 0],
                                        box[1, 1], box[2, 0], box[2, 1],
                                        box[3, 0], box[3,
                                                       1], recognition_result))

                                # Draw bounding box
                                im_txt = cv2.polylines(
                                    im[:, :, ::-1],
                                    [box.astype(np.int32).reshape((-1, 1, 2))],
                                    True,
                                    color=(0, 0, 255),
                                    thickness=2)
                                '''
                                # Draw recognition results area
                                text_area = box.copy()
                                text_area[2, 1] = text_area[1, 1]
                                text_area[3, 1] = text_area[0, 1]
                                text_area[0, 1] = text_area[0, 1] - 15
                                text_area[1, 1] = text_area[1, 1] - 15
                                cv2.fillPoly(im[:, :, ::-1], [text_area.astype(np.int32).reshape((-1, 1, 2))], color=(255, 255, 0))
                                im_txt = cv2.putText(im[:, :, ::-1], recognition_result, (box[0, 0], box[0, 1]), font, 0.5, (0, 0, 255), 1)
                                '''
                else:
                    #for ICDAR
                    #res_file = os.path.join(FLAGS.output_dir, 'res_' + '{}.txt'.format(os.path.basename(im_fn).split('.')[0]))

                    #for MTWI
                    res_file = os.path.join(
                        FLAGS.output_dir,
                        '{}.txt'.format(im_fn[:-4].split('/')[-1]))

                    f = open(res_file, "w")
                    im_txt = None
                    f.close()

                #to show how many pictures left
                index -= 1
                print('------------------------------------')

                if not FLAGS.no_write_images:
                    img_path = os.path.join(FLAGS.output_dir,
                                            os.path.basename(im_fn))
                    # cv2.imwrite(img_path, im[:, :, ::-1])
                    if im_txt is not None:
                        cv2.imwrite(img_path, im_txt)

Esempio n. 2

Mostra file

def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    bk_tree = BKTree(levenshtein, list_words(FLAGS.vocab))
    # bk_tree = bktree.Tree()

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        input_feature_map = tf.placeholder(tf.float32,
                                           shape=[None, None, None, 32],
                                           name='input_feature_map')
        input_transform_matrix = tf.placeholder(tf.float32,
                                                shape=[None, 6],
                                                name='input_transform_matrix')
        input_box_mask = []
        input_box_mask.append(
            tf.placeholder(tf.int32, shape=[None], name='input_box_masks_0'))
        input_box_widths = tf.placeholder(tf.int32,
                                          shape=[None],
                                          name='input_box_widths')

        input_seq_len = input_box_widths[tf.argmax(
            input_box_widths, 0)] * tf.ones_like(input_box_widths)
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        shared_feature, f_score, f_geometry = detect_part.model(input_images)
        pad_rois = roi_rotate_part.roi_rotate_tensor_pad(
            input_feature_map, input_transform_matrix, input_box_mask,
            input_box_widths)
        recognition_logits = recognize_part.build_graph(
            pad_rois, input_box_widths)
        _, dense_decode = recognize_part.decode(recognition_logits,
                                                input_box_widths)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                #im = cv2.imread(im_fn)[:, :, ::-1]
                im = cv2.imread(im_fn)
                im = cv2.resize(im, (960, 540))

                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)
                # im_resized_d, (ratio_h_d, ratio_w_d) = resize_image_detection(im)

                timer = {'detect': 0, 'restore': 0, 'nms': 0, 'recog': 0}
                start = time.time()
                shared_feature_map, score, geometry = sess.run(
                    [shared_feature, f_score, f_geometry],
                    feed_dict={input_images: [im_resized]})

                boxes, timer = detect(score_map=score,
                                      geo_map=geometry,
                                      timer=timer)
                timer['detect'] = time.time() - start
                start = time.time()  # reset for recognition
                if boxes is not None and boxes.shape[0] != 0:
                    #res_file_path = os.path.join(FLAGS.output_dir,'res_' + '{}.txt'.format(os.path.basename(im_fn).split('.')[0]))
                    res_file_path = os.path.join(
                        FLAGS.output_dir,
                        '{}.txt'.format(os.path.basename(im_fn)))

                    input_roi_boxes = boxes[:, :8].reshape(-1, 8)
                    recog_decode_list = []
                    # Here avoid too many text area leading to OOM
                    for batch_index in range(input_roi_boxes.shape[0] // 32 +
                                             1):  # test roi batch size is 32
                        start_slice_index = batch_index * 32
                        end_slice_index = (
                            batch_index + 1
                        ) * 32 if input_roi_boxes.shape[0] >= (
                            batch_index + 1) * 32 else input_roi_boxes.shape[0]
                        tmp_roi_boxes = input_roi_boxes[
                            start_slice_index:end_slice_index]

                        boxes_masks = [0] * tmp_roi_boxes.shape[0]
                        transform_matrixes, box_widths = get_project_matrix_and_width(
                            tmp_roi_boxes)
                        #max_box_widths = max_width * np.ones(boxes_masks.shape[0]) # seq_len

                        # Run end to end
                        recog_decode = sess.run(dense_decode,
                                                feed_dict={
                                                    input_feature_map:
                                                    shared_feature_map,
                                                    input_transform_matrix:
                                                    transform_matrixes,
                                                    input_box_mask[0]:
                                                    boxes_masks,
                                                    input_box_widths:
                                                    box_widths
                                                })
                        recog_decode_list.extend([r for r in recog_decode])

                    timer['recog'] = time.time() - start
                    # Preparing for draw boxes
                    boxes = boxes[:, :8].reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h

                    if len(recog_decode_list) != boxes.shape[0]:
                        print(
                            "detection and recognition result are not equal!")
                        exit(-1)

                    with open(res_file_path, 'w') as f:
                        for i, box in enumerate(boxes):
                            # to avoid submitting errors
                            box = sort_poly(box.astype(np.int32))
                            if np.linalg.norm(box[0] -
                                              box[1]) < 5 or np.linalg.norm(
                                                  box[3] - box[0]) < 5:
                                continue
                            recognition_result = ground_truth_to_word(
                                recog_decode_list[i])

                            if contain_eng(recognition_result):
                                print(recognition_result)
                                fix_result = bktree_search(
                                    bk_tree, recognition_result.lower())
                                print(fix_result)
                                if len(fix_result) != 0:
                                    recognition_result = fix_result[0][1]
                                # print(recognition_result)
                            else:
                                recognition_result = recognition_result

                            f.write('{},{},{},{},{},{},{},{},{}\r\n'.format(
                                box[0, 0], box[0, 1], box[1, 0], box[1, 1],
                                box[2, 0], box[2, 1], box[3, 0], box[3, 1],
                                recognition_result))

                            # Draw bounding box
                            cv2.polylines(
                                im, [box.astype(np.int32).reshape((-1, 1, 2))],
                                True,
                                color=(255, 255, 0),
                                thickness=1)
                            # Draw recognition results area
                            text_area = box.copy()
                            text_area[2, 1] = text_area[1, 1]
                            text_area[3, 1] = text_area[0, 1]
                            text_area[0, 1] = text_area[0, 1] - 15
                            text_area[1, 1] = text_area[1, 1] - 15
                            cv2.fillPoly(im, [
                                text_area.astype(np.int32).reshape((-1, 1, 2))
                            ],
                                         color=(255, 255, 0))
                            im_txt = cv2.putText(im, recognition_result,
                                                 (box[0, 0], box[0, 1]), font,
                                                 0.5, (0, 0, 255), 1)
                            # 中文文字添加:
                            # im_txt = cv2ImgAddText(im, recognition_result, box[0, 0], box[0, 1], (0, 0, 149), 20)
                else:
                    #res_file = os.path.join(FLAGS.output_dir, 'res_' + '{}.txt'.format(os.path.basename(im_fn).split('.')[0]))
                    res_file = os.path.join(
                        FLAGS.output_dir,
                        '{}.txt'.format(os.path.basename(im_fn)))
                    f = open(res_file, "w")
                    im_txt = None
                    f.close()

                print(
                    '{} : detect {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms, recog {:.0f}ms'
                    .format(im_fn, timer['detect'] * 1000,
                            timer['restore'] * 1000, timer['nms'] * 1000,
                            timer['recog'] * 1000))

                duration = time.time() - start_time
                print('[timing] {}'.format(duration))

                if not FLAGS.no_write_images:
                    img_path = os.path.join(FLAGS.output_dir,
                                            os.path.basename(im_fn))
                    #cv2.imwrite(img_path, im[:, :, ::-1])
                    if im_txt is not None:
                        cv2.imwrite(img_path, im_txt)

Esempio n. 3

Mostra file

def main(photo):
	import os  
	with tf.get_default_graph().as_default():
		input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images')
		input_feature_map = tf.placeholder(tf.float32, shape=[None, None, None, 32], name='input_feature_map')
		input_transform_matrix = tf.placeholder(tf.float32, shape=[None, 6], name='input_transform_matrix')
		input_box_mask = []
		input_box_mask.append(tf.placeholder(tf.int32, shape=[None], name='input_box_masks_0'))
		input_box_widths = tf.placeholder(tf.int32, shape=[None], name='input_box_widths')

		input_seq_len = input_box_widths[tf.argmax(input_box_widths, 0)] * tf.ones_like(input_box_widths)
		global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

		shared_feature, f_score, f_geometry = detect_part.model(input_images)
		pad_rois = roi_rotate_part.roi_rotate_tensor_pad(input_feature_map, input_transform_matrix, input_box_mask, input_box_widths)
		recognition_logits = recognize_part.build_graph(pad_rois, input_box_widths)
		_, dense_decode = recognize_part.decode(recognition_logits, input_box_widths)

		variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
		saver = tf.train.Saver(variable_averages.variables_to_restore())

		with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
			ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
			model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
			print('Restore from {}'.format(model_path))
			saver.restore(sess, model_path)
			
			img = photo
			# Decode Python (Flask or Werkzeug) photo file uploaded via HTTP POST request in-memory to an OpenCV matrix
			in_memory_file = io.BytesIO()
			photo.save(in_memory_file)
			data = np.fromstring(in_memory_file.getvalue(), dtype=np.uint8)
			# im = cv2.imread(photo)[:, :, ::-1]
			color_image_flag = 1
			img = cv2.imdecode(data, color_image_flag)
			img = img[:, :, ::-1]
			#print('image',img) 
		   
			start_time = time.time()
			im_resized, (ratio_h, ratio_w) = resize_image(img)
			timer = {'detect': 0, 'restore': 0, 'nms': 0, 'recog': 0}
			start = time.time()
			shared_feature_map, score, geometry = sess.run([shared_feature, f_score, f_geometry], feed_dict={input_images: [im_resized]})
			
			boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer)
			timer['detect'] = time.time() - start
			start = time.time() # reset for recognition

			text_result =[]
			if boxes is not None and boxes.shape[0] != 0:
				
				res_file_path = os.path.join(FLAGS.output_dir, 'res_' + '{}.txt'.format(photo.filename))
				# format(os.path.basename(image).split('.')[0]))

				input_roi_boxes = boxes[:, :8].reshape(-1, 8)
				recog_decode_list = []
				# Here avoid too many text area leading to OOM
				for batch_index in range(input_roi_boxes.shape[0] // 32 + 1): # test roi batch size is 32
					start_slice_index = batch_index * 32
					end_slice_index = (batch_index + 1) * 32 if input_roi_boxes.shape[0] >= (batch_index + 1) * 32 else input_roi_boxes.shape[0]
					tmp_roi_boxes = input_roi_boxes[start_slice_index:end_slice_index]

					boxes_masks = [0] * tmp_roi_boxes.shape[0]
					transform_matrixes, box_widths = get_project_matrix_and_width(tmp_roi_boxes)
					# max_box_widths = max_width * np.ones(boxes_masks.shape[0]) # seq_len
					
					# Run end to end
					recog_decode = sess.run(dense_decode, feed_dict={input_feature_map: shared_feature_map, input_transform_matrix: transform_matrixes, input_box_mask[0]: boxes_masks, input_box_widths: box_widths})
					recog_decode_list.extend([r for r in recog_decode])

				timer['recog'] = time.time() - start
				# Preparing for draw boxes
				boxes = boxes[:, :8].reshape((-1, 4, 2))
				boxes[:, :, 0] /= ratio_w
				boxes[:, :, 1] /= ratio_h

				if len(recog_decode_list) != boxes.shape[0]:
					print("detection and recognition result are not equal!")
					exit(-1)

				with open(res_file_path, 'w') as f:
					for i, box in enumerate(boxes):
						# to avoid submitting errors
						box = sort_poly(box.astype(np.int32))
						if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5:
							continue
						recognition_result = ground_truth_to_word(recog_decode_list[i])
						text_result.append(recognition_result)
						
						f.write('{},{},{},{},{},{},{},{},{}\r\n'.format(
							box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1], recognition_result
						))
							
						# Draw bounding box
						cv2.polylines(img[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=1)
						# Draw recognition results area
						text_area = box.copy()
						text_area[2, 1] = text_area[1, 1]
						text_area[3, 1] = text_area[0, 1]
						text_area[0, 1] = text_area[0, 1] - 15
						text_area[1, 1] = text_area[1, 1] - 15
						
						cv2.fillPoly(img[:, :, ::-1], [text_area.astype(np.int32).reshape((-1, 1, 2))], color=(255, 255, 0))
						im_txt = cv2.putText(img[:, :, ::-1], recognition_result, (box[0, 0], box[0, 1]), font, 0.5, (0, 0, 255), 1)
			else:
				res_file = os.path.join(FLAGS.output_dir, 'res_' + '{}.txt'.format(os.path.basename(photo.filename).split('.')[0]))
				f = open(res_file, "w")
				im_txt = None
				f.close()

			print('{} : detect {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms, recog {:.0f}ms'.format(
				photo.filename, timer['detect']*1000, timer['restore']*1000, timer['nms']*1000, timer['recog']*1000))

			duration = time.time() - start_time
			print('[timing] {}'.format(duration))

			if not FLAGS.no_write_images:
				img_path = os.path.join(FLAGS.output_dir, os.path.basename(photo.filename))
				cv2.imwrite(img_path, img)
				if im_txt is not None:
					cv2.imwrite(img_path, im_txt)

	return text_result

Esempio n. 4

Mostra file

File: predict_mtwi.py Progetto: shiwuwen/DRWord

def predictmtwi(image_path, return_dic):
    checkpoint_path = '/home/wsw/workplace/deeplearning/DRWord/algorithm/DRWord_ch/checkpointmtwi/'

    #是否使用GPU：-1不是用；0使用GPU0
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = '1'

    with tf.Graph().as_default():  #tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        input_feature_map = tf.placeholder(tf.float32,
                                           shape=[None, None, None, 32],
                                           name='input_feature_map')
        input_transform_matrix = tf.placeholder(tf.float32,
                                                shape=[None, 6],
                                                name='input_transform_matrix')
        input_box_mask = []
        input_box_mask.append(
            tf.placeholder(tf.int32, shape=[None], name='input_box_masks_0'))
        input_box_widths = tf.placeholder(tf.int32,
                                          shape=[None],
                                          name='input_box_widths')

        input_seq_len = input_box_widths[tf.argmax(
            input_box_widths, 0)] * tf.ones_like(input_box_widths)
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        shared_feature, f_geometry = detect_part.model(input_images)
        pad_rois = roi_rotate_part.roi_rotate_tensor_pad(
            input_feature_map, input_transform_matrix, input_box_mask,
            input_box_widths)
        recognition_logits = recognize_part.build_graph(
            pad_rois, input_box_widths)
        _, dense_decode = recognize_part.decode(recognition_logits,
                                                input_box_widths)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        im_fn_list = []
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
            model_path = os.path.join(
                checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list.append(image_path)
            index = len(im_fn_list)
            for im_fn in im_fn_list:

                #image for draw quad
                im = cv2.imread(im_fn)[:, :, ::-1]
                im_resized, (ratio_h, ratio_w) = resize_image(im)

                img = image.img_to_array(im_resized)
                img = preprocess_input(img, mode='tf')

                shared_feature_map, geometry = sess.run(
                    [shared_feature, f_geometry],
                    feed_dict={input_images: [img]})

                geometry = np.squeeze(geometry, axis=0)
                geometry[:, :, :3] = sigmoid(geometry[:, :, :3])

                pixel_threshold = 0.9
                cond = np.greater_equal(geometry[:, :, 0], pixel_threshold)
                activation_pixels = np.where(cond)

                #boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer)
                quad_scores, boxes = nms(geometry, activation_pixels)
                #print('boxes : ',boxes)

                input_roi_boxes = []
                for score, box in zip(quad_scores, boxes):
                    if np.amin(score) > 0:
                        #print(type(box))
                        box = box[[0, 3, 2, 1]]
                        input_roi_boxes.append(box)
                input_roi_boxes = np.array(input_roi_boxes)
                #print('input_roi_boxes : ',input_roi_boxes[:])

                txt_result = []
                im_result = im

                #im_txt = None
                if input_roi_boxes is not None and input_roi_boxes.shape[
                        0] != 0:

                    input_roi_boxes = input_roi_boxes[:, :8].reshape(-1, 8)
                    recog_decode_list = []
                    # Here avoid too many text area leading to OOM
                    for batch_index in range(input_roi_boxes.shape[0] // 32 +
                                             1):  # test roi batch size is 32
                        start_slice_index = batch_index * 32
                        end_slice_index = (
                            batch_index + 1
                        ) * 32 if input_roi_boxes.shape[0] >= (
                            batch_index + 1) * 32 else input_roi_boxes.shape[0]
                        tmp_roi_boxes = input_roi_boxes[
                            start_slice_index:end_slice_index]

                        boxes_masks = [0] * tmp_roi_boxes.shape[0]

                        transform_matrixes, box_widths = get_project_matrix_and_width(
                            tmp_roi_boxes)
                        # max_box_widths = max_width * np.ones(boxes_masks.shape[0]) # seq_len

                        # Run end to end
                        try:
                            recog_decode = sess.run(dense_decode,
                                                    feed_dict={
                                                        input_feature_map:
                                                        shared_feature_map,
                                                        input_transform_matrix:
                                                        transform_matrixes,
                                                        input_box_mask[0]:
                                                        boxes_masks,
                                                        input_box_widths:
                                                        box_widths
                                                    })
                            recog_decode_list.extend([r for r in recog_decode])
                        except:
                            recog_decode_list.extend([None])

                    if len(recog_decode_list) != input_roi_boxes.shape[0]:
                        return txt_result, im_result

                    input_roi_boxes = input_roi_boxes[:, :8].reshape(
                        (-1, 4, 2))

                    for i, box in enumerate(input_roi_boxes):
                        #if np.amin(score) > 0:
                        if True:

                            box = box[[0, 3, 2, 1]]
                            #box = box.astype(np.int32)

                            box = box / [ratio_w, ratio_h]
                            box = box.astype(np.int32)

                            recognition_result = ground_truth_to_word(
                                recog_decode_list[i])
                            txt_result.append(recognition_result)
                            # Draw bounding box
                            im_result = cv2.polylines(
                                im[:, :, ::-1],
                                [box.astype(np.int32).reshape((-1, 1, 2))],
                                True,
                                color=(0, 0, 255),
                                thickness=2)

                else:
                    txt_result = []
                    im_result = im

    #tf.reset_default_graph()
            return_dic['im_result_mtwi'] = im_result
            return_dic['txt_result_mtwi'] = txt_result

Esempio n. 5

Mostra file

File: main_test_bktree_eval_v2.py Progetto: qianyuqianxun-DeepLearning/FOTS_TF-master_OCR

def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list
    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    bk_tree = BKTree(levenshtein, list_words(FLAGS.vocab))
    # bk_tree = bktree.Tree()

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        input_feature_map = tf.placeholder(tf.float32,
                                           shape=[None, None, None, 32],
                                           name='input_feature_map')
        input_transform_matrix = tf.placeholder(tf.float32,
                                                shape=[None, 6],
                                                name='input_transform_matrix')
        input_box_mask = []
        input_box_mask.append(
            tf.placeholder(tf.int32, shape=[None], name='input_box_masks_0'))
        input_box_widths = tf.placeholder(tf.int32,
                                          shape=[None],
                                          name='input_box_widths')

        input_seq_len = input_box_widths[tf.argmax(
            input_box_widths, 0)] * tf.ones_like(input_box_widths)
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        shared_feature, f_score, f_geometry = detect_part.model(input_images)
        pad_rois = roi_rotate_part.roi_rotate_tensor_pad(
            input_feature_map, input_transform_matrix, input_box_mask,
            input_box_widths)
        recognition_logits = recognize_part.build_graph(
            pad_rois, input_box_widths)
        _, dense_decode = recognize_part.decode(recognition_logits,
                                                input_box_widths)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            # im_fn_list = get_images()
            if FLAGS.just_infer:
                im_fn_list, _, _ = get_image_self(
                    "/data/ceph_11015/ssd/anhan/nba/video2image")
            else:
                im_fn_list, corridate_list, label_list = get_image_self(
                    "/data/ceph_11015/ssd/anhan/nba/video2image")
            wrong = 0
            total = 0
            for ind, im_fn in enumerate(im_fn_list):
                #print("im_fn:",im_fn)
                im = cv2.imread(im_fn)[:, :, ::-1]
                im = cv2.resize(im, (960, 540))

                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)
                # im_resized_d, (ratio_h_d, ratio_w_d) = resize_image_detection(im)

                timer = {'detect': 0, 'restore': 0, 'nms': 0, 'recog': 0}
                start = time.time()
                shared_feature_map, score, geometry = sess.run(
                    [shared_feature, f_score, f_geometry],
                    feed_dict={input_images: [im_resized]})

                boxes, timer = detect(score_map=score,
                                      geo_map=geometry,
                                      timer=timer)
                timer['detect'] = time.time() - start
                start = time.time()  # reset for recognition
                res = None
                str_list = []
                if boxes is not None and boxes.shape[0] != 0:
                    #res_file_path = os.path.join(FLAGS.output_dir,'res_' + '{}.txt'.format(os.path.basename(im_fn).split('.')[0]))
                    # res_file_path = os.path.join(FLAGS.output_dir, '{}.txt'.format(os.path.basename(im_fn)))

                    input_roi_boxes = boxes[:, :8].reshape(-1, 8)
                    recog_decode_list = []
                    # Here avoid too many text area leading to OOM
                    for batch_index in range(input_roi_boxes.shape[0] // 32 +
                                             1):  # test roi batch size is 32
                        start_slice_index = batch_index * 32
                        end_slice_index = (
                            batch_index + 1
                        ) * 32 if input_roi_boxes.shape[0] >= (
                            batch_index + 1) * 32 else input_roi_boxes.shape[0]
                        tmp_roi_boxes = input_roi_boxes[
                            start_slice_index:end_slice_index]

                        boxes_masks = [0] * tmp_roi_boxes.shape[0]
                        transform_matrixes, box_widths = get_project_matrix_and_width(
                            tmp_roi_boxes)
                        #max_box_widths = max_width * np.ones(boxes_masks.shape[0]) # seq_len

                        # Run end to end
                        recog_decode = sess.run(dense_decode,
                                                feed_dict={
                                                    input_feature_map:
                                                    shared_feature_map,
                                                    input_transform_matrix:
                                                    transform_matrixes,
                                                    input_box_mask[0]:
                                                    boxes_masks,
                                                    input_box_widths:
                                                    box_widths
                                                })
                        recog_decode_list.extend([r for r in recog_decode])

                    timer['recog'] = time.time() - start
                    # Preparing for draw boxes
                    boxes = boxes[:, :8].reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h

                    if len(recog_decode_list) != boxes.shape[0]:
                        print(
                            "detection and recognition result are not equal!")
                        exit(-1)

                    scores = {}
                    score_index = 0
                    time_left = {}
                    time_index = 0
                    team_name = {}
                    quarter_dict = {}
                    remainder_attack_time = {}
                    remainder_attack_time_index = 0
                    recognition_result_num = 0
                    points = {}
                    for i, box in enumerate(boxes):
                        # to avoid submitting errors
                        box = sort_poly(box.astype(np.int32))
                        if np.linalg.norm(box[0] -
                                          box[1]) < 5 or np.linalg.norm(
                                              box[3] - box[0]) < 5:
                            continue
                        recognition_result = ground_truth_to_word(
                            recog_decode_list[i])

                        if contain_eng(recognition_result):
                            #print(recognition_result)
                            fix_result = bktree_search(
                                bk_tree, recognition_result.lower())
                            #print(fix_result)
                            if len(fix_result) != 0:
                                recognition_result = fix_result[0][1]
                                #print(recognition_result)
                        else:
                            recognition_result = recognition_result

                        if recognition_result in all_team:
                            team_name[recognition_result] = [
                                (int(box[0, 0]) + int(box[2, 0])) / 2,
                                (int(box[0, 1]) + int(box[2, 1])) / 2
                            ]
                            points[recognition_result] = [box[0, 0], box[2, 0]]

                        if recognition_result in quarter:
                            quarter_dict[recognition_result] = [
                                (int(box[0, 0]) + int(box[2, 0])) / 2,
                                (int(box[0, 1]) + int(box[2, 1])) / 2
                            ]
                            points[recognition_result] = [box[0, 0], box[2, 0]]

                        if recognition_result.isdigit():
                            scores[recognition_result + "_" +
                                   str(score_index)] = [
                                       (int(box[0, 0]) + int(box[2, 0])) / 2,
                                       (int(box[0, 1]) + int(box[2, 1])) / 2
                                   ]
                            points[recognition_result + "_" +
                                   str(score_index)] = [box[0, 0], box[2, 0]]
                            score_index += 1

                        if ":" in recognition_result:
                            time_left[recognition_result + "_" +
                                      str(time_index)] = [
                                          (int(box[0, 0]) + int(box[2, 0])) /
                                          2,
                                          (int(box[0, 1]) + int(box[2, 1])) / 2
                                      ]
                            points[recognition_result + "_" +
                                   str(time_index)] = [box[0, 0], box[2, 0]]
                            time_index += 1

                        if "." in recognition_result and ":" not in recognition_result:
                            remainder_attack_time[
                                recognition_result + "_" +
                                str(remainder_attack_time_index)] = [
                                    (int(box[0, 0]) + int(box[2, 0])) / 2,
                                    (int(box[0, 1]) + int(box[2, 1])) / 2
                                ]
                            points[recognition_result + "_" +
                                   str(remainder_attack_time_index)] = [
                                       box[0, 0], box[2, 0]
                                   ]
                            remainder_attack_time_index += 1

                        recognition_result_num += 1
                        str_list.append(recognition_result)
                        # Draw bounding box
                        # cv2.polylines(im, [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=1)
                        # Draw recognition results area
                        # text_area = box.copy()
                        # text_area[2, 1] = text_area[1, 1]
                        # text_area[3, 1] = text_area[0, 1]
                        # text_area[0, 1] = text_area[0, 1] - 15
                        # text_area[1, 1] = text_area[1, 1] - 15
                        # cv2.fillPoly(im, [text_area.astype(np.int32).reshape((-1, 1, 2))], color=(255, 255, 0))
                        # im_txt = cv2.putText(im, recognition_result, (box[0, 0], box[0, 1]), font, 0.5, (0, 0, 255), 1)
                        # 中文文字添加:
                        # im_txt = cv2ImgAddText(im, recognition_result, box[0, 0], box[0, 1], (0, 0, 149), 20)

                    if recognition_result_num == 7 or recognition_result_num == 6 or recognition_result_num == 5 or recognition_result_num == 8:
                        res = get_content(remainder_attack_time, time_left,
                                          team_name, scores, quarter_dict)
                    elif recognition_result_num == 9:
                        sort_points = sorted(points.items(),
                                             key=lambda item: item[1][0])
                        x_coordiate = []
                        for pair in sort_points:
                            x_coordiate.append(pair[1][0])
                            x_coordiate.append(pair[1][1])
                        x_sort = sorted(x_coordiate)
                        if x_sort == x_coordiate:
                            drop1 = sort_points[1][0]
                            drop2 = sort_points[4][0]
                            if drop1 in remainder_attack_time:
                                remainder_attack_time = remove_key(
                                    remainder_attack_time, drop1)
                            if drop2 in remainder_attack_time:
                                remainder_attack_time = remove_key(
                                    remainder_attack_time, drop2)
                            if drop1 in time_left:
                                time_left = remove_key(time_left, drop1)
                            if drop2 in time_left:
                                time_left = remove_key(time_left, drop2)
                            if drop1 in scores:
                                scores = remove_key(scores, drop1)
                            if drop2 in scores:
                                scores = remove_key(scores, drop2)
                        res = get_content(remainder_attack_time, time_left,
                                          team_name, scores, quarter_dict)
                if not FLAGS.just_infer:
                    corridate_true = corridate_list[ind].split("_")[4:]
                    label_true = label_list[ind].split("_")
                    res_true = get_score_info_v2(corridate_true, label_true)
                    if res != res_true:
                        #print(im_fn.split("/")[-1],'wrong!!!')
                        wrong += 1
                        #print(im_fn.split("/")[-1],label_list[ind],res_true,res,("_").join(str_list))
                    total += 1
                    print(
                        im_fn.split("/")[-1], label_list[ind], res_true, res,
                        ("_").join(str_list))
                else:
                    print(im_fn.split("/")[-1], res, ("_").join(str_list))
                duration = time.time() - start_time
                #print('{} : detect {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms, recog {:.0f}ms'.format(im_fn, timer['detect']*1000, timer['restore']*1000, timer['nms']*1000, timer['recog']*1000))
            print("wrong:{}".format(wrong))
            print("total:{}".format(total))
            print("precision:{}".format((total - wrong) / total))