Ejemplo n.º 1
0
def get_all_boxes(img):
    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
        model_path = os.path.join(
            checkpoint_path,
            os.path.basename(ckpt_state.model_checkpoint_path))
        saver.restore(sess, model_path)

        img = cv2.imread(img)[:, :, ::-1]
        img_resized, (ratio_h, ratio_w) = ev.resize_image(img)
        score, geometry = sess.run([f_score, f_geometry],
                                   feed_dict={input_images: [img_resized]})
        timer = {'net': 0, 'restore': 0, 'nms': 0}
        boxes, timer = ev.detect(score_map=score,
                                 geo_map=geometry,
                                 timer=timer)

        res = []
        if boxes is not None:
            boxes = boxes[:, :8].reshape((-1, 4, 2))
            boxes[:, :, 0] /= ratio_w
            boxes[:, :, 1] /= ratio_h
            for box in boxes:
                # to avoid submitting errors
                box = ev.sort_poly(box.astype(np.int32))
                if np.linalg.norm(box[0] -
                                  box[1]) < 5 or np.linalg.norm(box[3] -
                                                                box[0]) < 5:
                    continue
                res.append([
                    box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0],
                    box[2, 1], box[3, 0], box[3, 1]
                ])
        return res
def text_detection(img_path):
  start_time = time.time()
  rtparams = collections.OrderedDict()

  img = cv2.imread(img_path, 1)
  im_resized, (ratio_h, ratio_w) = resize_image(img)

  rtparams['start_time'] = datetime.datetime.now().isoformat()
  rtparams['image_size'] = '{}x{}'.format(img.shape[1], img.shape[0])



  rtparams['working_size'] = '{}x{}'.format(
    im_resized.shape[1], im_resized.shape[0])

  timer = collections.OrderedDict([
    ('net', 0),
    ('restore', 0),
    ('nms', 0)
  ])

  start = time.time()
  score, geometry = sess.run(
    [f_score, f_geometry],
    feed_dict={input_images: [im_resized[:,:,::-1]]})
  timer['net'] = time.time() - start
  boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer)

  print('net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format(
    timer['net']*1000, timer['restore']*1000, timer['nms']*1000))

  if boxes is not None:
      scores = boxes[:,8].reshape(-1)
      boxes = boxes[:, :8].reshape((-1, 4, 2))
      boxes[:, :, 0] /= ratio_w
      boxes[:, :, 1] /= ratio_h

  duration = time.time() - start_time
  timer['overall'] = duration
  print('[timing] {}'.format(duration))

  if boxes is not None:
      text_lines = []
      for box, score in zip(boxes, scores):
          box = sort_poly(box.astype(np.int32))
          if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5:
              continue
          tl = collections.OrderedDict(zip(
              ['x0', 'y0', 'x1', 'y1', 'x2', 'y2', 'x3', 'y3'],
              map(float, box.flatten())))
          tl['score'] = float(score)
          text_lines.append(tl)

  #print(text_lines)
  return text_lines
Ejemplo n.º 3
0
def predictor(img):
    begin = time.time()
    im_resized, (ratio_h, ratio_w) = resize_image(img)
    print('Prepare image', (time.time() - begin) * 1000)
    begin = time.time()
    score, geometry = sess.run(
        [f_score, f_geometry],
        feed_dict={input_images: [im_resized[:, :, ::-1]]})

    print('Forward Pass', (time.time() - begin) * 1000)
    begin = time.time()
    boxes, timer = detect(score_map=score, geo_map=geometry, timer={})
    print('Detect (NMS, etc...)', (time.time() - begin) * 1000)
    begin = time.time()

    if boxes is not None:
        scores = boxes[:, 8].reshape(-1)
        boxes = boxes[:, :8].reshape((-1, 4, 2))
        boxes[:, :, 0] /= ratio_w
        boxes[:, :, 1] /= ratio_h

    text_lines = []
    if boxes is not None:
        text_lines = []
        for box, score in zip(boxes, scores):
            box = sort_poly(box.astype(np.int32))
            if np.linalg.norm(box[0] -
                              box[1]) < 5 or np.linalg.norm(box[3] -
                                                            box[0]) < 5:
                continue
            tl = collections.OrderedDict(
                zip(['x0', 'y0', 'x1', 'y1', 'x2', 'y2', 'x3', 'y3'],
                    map(float, box.flatten())))
            tl['score'] = float(score)
            text_lines.append(tl)
    ret = {
        'text_lines': text_lines,
    }
    return ret
Ejemplo n.º 4
0
    def predictor(img):
        """
        :return: {
            'text_lines': [
                {
                    'score': ,
                    'x0': ,
                    'y0': ,
                    'x1': ,
                    ...
                    'y3': ,
                }
            ],
            'rtparams': {  # runtime parameters
                'image_size': ,
                'working_size': ,
            },
            'timing': {
                'net': ,
                'restore': ,
                'nms': ,
                'cpuinfo': ,
                'meminfo': ,
                'uptime': ,
            }
        }
        """
        start_time = time.time()
        rtparams = collections.OrderedDict()
        rtparams['start_time'] = datetime.datetime.now().isoformat()
        rtparams['image_size'] = '{}x{}'.format(img.shape[1], img.shape[0])
        timer = collections.OrderedDict([('net', 0), ('restore', 0),
                                         ('nms', 0)])

        im_resized, (ratio_h, ratio_w) = resize_image(img)
        rtparams['working_size'] = '{}x{}'.format(im_resized.shape[1],
                                                  im_resized.shape[0])
        start = time.time()
        score, geometry = sess.run(
            [f_score, f_geometry],
            feed_dict={input_images: [im_resized[:, :, ::-1]]})
        timer['net'] = time.time() - start

        boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer)
        logger.info('net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format(
            timer['net'] * 1000, timer['restore'] * 1000, timer['nms'] * 1000))

        if boxes is not None:
            scores = boxes[:, 8].reshape(-1)
            boxes = boxes[:, :8].reshape((-1, 4, 2))
            boxes[:, :, 0] /= ratio_w
            boxes[:, :, 1] /= ratio_h

        duration = time.time() - start_time
        timer['overall'] = duration
        logger.info('[timing] {}'.format(duration))

        text_lines = []
        if boxes is not None:
            text_lines = []
            for box, score in zip(boxes, scores):
                box = sort_poly(box.astype(np.int32))
                if np.linalg.norm(box[0] -
                                  box[1]) < 5 or np.linalg.norm(box[3] -
                                                                box[0]) < 5:
                    continue
                tl = collections.OrderedDict(
                    zip(['x0', 'y0', 'x1', 'y1', 'x2', 'y2', 'x3', 'y3'],
                        map(float, box.flatten())))
                tl['score'] = float(score)
                text_lines.append(tl)
        ret = {
            'text_lines': text_lines,
            'rtparams': rtparams,
            'timing': timer,
        }
        #ret.update(get_host_info())
        return ret
Ejemplo n.º 5
0
                    yolo_ymin = np.max([
                        int((yolo_box.y - yolo_box.h / 2) * frame.shape[0]), 0
                    ])
                    yolo_ymax = np.min([
                        int((yolo_box.y + yolo_box.h / 2) * frame.shape[0]),
                        frame.shape[0]
                    ])
                    crop_img = frame[yolo_ymin:yolo_ymax,
                                     yolo_xmin:yolo_xmax, :]

                    # cv2.imshow('frame', frame)
                    # cv2.imshow('crop', crop_img)
                    # cv2.waitKey(0)
                    # exit()

                    im_resized, (ratio_h, ratio_w) = resize_image(crop_img)

                    timer = {'net': 0, 'restore': 0, 'nms': 0}
                    start = time.time()
                    score, geometry = sess.run(
                        [f_score, f_geometry],
                        feed_dict={input_images: [im_resized]})
                    timer['net'] = time.time() - start

                    east_boxes, timer = detect(score_map=score,
                                               geo_map=geometry,
                                               timer=timer)

                    # frame[0:100, 0:200, :] = np.zeros((100, 200, 3))

                    if east_boxes is not None:
def main():
    checkpoint_path = '/home/dragonx/Documents/VideoText2018/EAST-master/weights/east_icdar2015_resnet_v1_50_rbox/'
    # sample_set = ["Video_54_7_4", "Video_16_3_2", "Video_46_6_4", "Video_33_2_3", "Video_10_1_1"]
    # sample = sample_set[0]
    global_path = '/media/dragonx/752d26ef-8f47-416d-b311-66c6dfabf4a3/Video_text/ICDAR/train/'
    save_path = '/media/dragonx/DataStorage/ARC/EASTRNN/data/icdar2015/video/'
    items = os.listdir(global_path)
    newlist = []
    for names in items:
        if names.endswith(".mp4"):
            newlist.append(os.path.splitext(names)[0])

    parser = argparse.ArgumentParser()
    parser.add_argument('--checkpoint-path', default=checkpoint_path)
    args = parser.parse_args()

    if not os.path.exists(checkpoint_path):
        raise RuntimeError('Checkpoint `{}` not found'.format(checkpoint_path))
    # read images until it is complete
    logger.info('loading model')
    gpu_options = tf.GPUOptions(allow_growth=True)
    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name='input_images')
    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)

    f_score, f_geometry, v_feature = model.model(input_images,
                                                 is_training=False)
    #
    variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
    saver = tf.train.Saver(variable_averages.variables_to_restore())
    # restore the model from weights
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

    ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
    model_path = os.path.join(
        checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
    logger.info('Restore from {}'.format(model_path))
    saver.restore(sess, model_path)
    sum = 0
    # get infos for video written
    for sample in newlist:
        print('##############Processing ' + sample + '###############')
        filename = global_path + sample + '.mp4'
        XML_filepath = global_path + sample + '_GT.xml'
        print(XML_filepath)
        cap = cv2.VideoCapture(filename)
        frame_width = int(cap.get(3))
        frame_height = int(cap.get(4))
        # read ground-truth boxes
        # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
        out = cv2.VideoWriter(save_path + sample + '.avi',
                              cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10,
                              (frame_width, frame_height))
        index = 0
        while (cap.isOpened()):
            ret, frame = cap.read()
            index = index + 1
            if ret == True:
                # targets are actually dict relates to one frame
                target = read_xml_solo(XML_filepath, index)
                cv2.imshow('Frame', frame)
                print('Processing %d frame with ' % (index), frame.shape)
                ######### Use EAST text detector ###########
                start_time = time.time()
                img = frame
                rtparams = collections.OrderedDict()
                rtparams['start_time'] = datetime.datetime.now().isoformat()
                rtparams['image_size'] = '{}x{}'.format(
                    img.shape[1], img.shape[0])
                timer = collections.OrderedDict([('net', 0), ('restore', 0),
                                                 ('nms', 0)])

                im_resized, (ratio_h, ratio_w) = resize_image(img)
                rtparams['working_size'] = '{}x{}'.format(
                    im_resized.shape[1], im_resized.shape[0])
                start = time.time()
                score_m, geometry, feature = sess.run(
                    [f_score, f_geometry, v_feature],
                    feed_dict={input_images: [im_resized[:, :, ::-1]]})
                timer['net'] = time.time() - start
                print(
                    'score shape {:s}, geometry shape {:s}, feature shape {:s}'
                    .format(str(score_m.shape), str(geometry.shape),
                            str(feature.shape)))
                boxes, timer = detect(score_map=score_m,
                                      geo_map=geometry,
                                      timer=timer)
                logger.info(
                    'net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format(
                        timer['net'] * 1000, timer['restore'] * 1000,
                        timer['nms'] * 1000))

                if boxes is not None:
                    scores = boxes[:, 8].reshape(-1)
                    boxes = boxes[:, :8].reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h

                duration = time.time() - start_time
                timer['overall'] = duration
                logger.info('[timing] {}'.format(duration))

                text_lines = []
                if boxes is not None:
                    text_lines = []
                    for box, score in zip(boxes, scores):
                        box = sort_poly(box.astype(np.int32))
                        if np.linalg.norm(box[0] -
                                          box[1]) < 5 or np.linalg.norm(
                                              box[3] - box[0]) < 5:
                            continue
                        tl = collections.OrderedDict(
                            zip([
                                'x0', 'y0', 'x1', 'y1', 'x2', 'y2', 'x3', 'y3'
                            ], map(float, box.flatten())))
                        tl['score'] = float(score)
                        text_lines.append(tl)
                ret = {
                    'text_lines': text_lines,
                    # 'rtparams': rtparams,
                    # 'timing': timer,
                    # 'geometry': geometry,
                    # 'score':float(score),
                }
                # 1. print boxes number
                print('%d Boxs found' % (len(text_lines)))
                # 2. eval_single_frame(target, box)
                p, r, f1 = eval_single_frame(target, ret)
                print('Precision %f, recall %f, F_measure %f' % (p, r, f1))
                # 3. save files into directory
                jsonfile = json.dumps(ret)
                directory = save_path + sample
                if not os.path.exists(directory):
                    os.makedirs(directory + '/json/')
                    os.makedirs(directory + '/npy/')
                    os.makedirs(directory + '/score/')

                jsonfname = directory + '/json/frame' + format(index,
                                                               '03d') + '.json'
                npyname = directory + '/npy/frame' + format(index,
                                                            '03d') + '.npy'
                scorename = directory + '/score/frame' + format(index,
                                                                '03d') + '.npy'
                np.save(npyname, feature)
                np.save(scorename, score_m)
                f = open(jsonfname, "w")
                f.write(jsonfile)
                f.close()
                # visualization
                new_img = draw_illu(img.copy(), ret)
                new_img1 = draw_illu_gt(new_img.copy(), target, p, r, f1)
                cv2.imshow('Annotated Frame with EAST', new_img1)
                out.write(new_img1)
                # Quit when Q is pressed
                if cv2.waitKey(25) & 0xFF == ord('q'):
                    break
                time.sleep(0.02)
            else:
                break

        cap.release()
        out.release()
        cv2.destroyAllWindows()
Ejemplo n.º 7
0
    def process(self, event):

        print("------------------------------ PHASE-1-0 STARTED for file " +
              event.src_path + "------------------------")
        pipeline_file = PipelineFileName(
            task_file_name=os.path.basename(event.src_path))

        if pipeline_file.file_cat == "M":
            return

        try:
            # output of phase 0 is the input to phase1-0
            input_path = os.path.join(
                os.path.dirname(os.path.realpath("__file__")), "phase0-output")
            temp_output_path = os.path.join(
                os.path.dirname(os.path.realpath("__file__")),
                "phase-1-0-output", "temp",
                pipeline_file.task_output_folder_name)
            output_folder_path = os.path.join(
                os.path.dirname(os.path.realpath("__file__")),
                "phase-1-0-output", pipeline_file.task_output_folder_name)
            output_main_path = os.path.join(
                os.path.dirname(os.path.realpath("__file__")),
                "phase-1-0-output")
            #after phase 1 processing archive the images to archive folder
            archive_to = os.path.join(os.path.basename(event.src_path),
                                      "processesd")

            img_file = event.src_path
            img = cv2.imread(img_file)[:, :, ::-1]
            start_time = time.time()
            img_resized, (ratio_h, ratio_w) = resize_image(img)

            img_resized = (img_resized / 127.5) - 1

            timer = {'net': 0, 'restore': 0, 'nms': 0}
            start = time.time()

            # feed image into model
            print("--->>>>> about to predict score map... for image " +
                  str(img_resized.shape))
            boxes = None
            with self.graph1.as_default():
                with self.tf_session.as_default():
                    score_map, geo_map = self.model.predict(
                        img_resized[np.newaxis, :, :, :])
                    timer['net'] = time.time() - start
                    print("--->>>>> about to detect boxes")
                    boxes, timer = detect(score_map=score_map,
                                          geo_map=geo_map,
                                          timer=timer)
                    print('{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.
                          format(img_file, timer['net'] * 1000,
                                 timer['restore'] * 1000, timer['nms'] * 1000))

            if boxes is not None:
                boxes = boxes[:, :8].reshape((-1, 4, 2))
                boxes[:, :, 0] /= ratio_w
                boxes[:, :, 1] /= ratio_h

            duration = time.time() - start_time
            print('[timing] {}'.format(duration))

            print("about to remove all files from if already exists" +
                  temp_output_path)
            try:
                shutil.rmtree(temp_output_path)
                shutil.rmtree(output_folder_path)
            except Exception as e:
                print(e)
            os.mkdir(temp_output_path)

            # erase all detected boxes
            if boxes is not None:
                #
                # remove all the boxes
                #
                idx = 0
                save_pipeline_file = PipelineFileName(
                    task_file_name=os.path.basename(event.src_path))
                doc_pos_map_file = os.path.join(temp_output_path,
                                                "doc_pos_map.csv")
                with open(doc_pos_map_file, 'w') as f:
                    f.write('{},{},{},{},{}\n'.format("file", "x1", "y1", "x2",
                                                      "y2"))
                    for box in boxes:
                        # to avoid submitting errors
                        box = sort_poly(box.astype(np.int32))
                        if np.linalg.norm(box[0] -
                                          box[1]) < 5 or np.linalg.norm(
                                              box[3] - box[0]) < 5:
                            continue

                        margin = 2
                        y1 = box[0, 1] - margin
                        y2 = box[2, 1] + margin
                        x1 = box[0, 0] - margin
                        x2 = box[2, 0] + margin

                        crop_img = img[y1:y2, x1:x2]
                        if crop_img.size != 0:
                            idx += 1
                            save_pipeline_file.segment = "0"
                            save_pipeline_file.segment = str(idx)
                            new_file = os.path.join(
                                temp_output_path,
                                save_pipeline_file.task_output_file_name)
                            print("phase-1-0: extracting snippet file ... " +
                                  new_file)
                            cv2.imwrite(new_file, crop_img)
                            f.write('{},{},{},{},{}\n'.format(
                                save_pipeline_file.task_output_file_name, x1,
                                y1, x2, y2))

                        cv2.fillPoly(
                            img[:, :, ::-1],
                            [box.astype(np.int32).reshape((-1, 1, 2))],
                            color=(255, 255, 255))

                # moving the temp data to output
                try:
                    shutil.move(temp_output_path, output_main_path)
                except Exception as e:
                    shutil.rmtree(output_folder_path)
                    shutil.move(temp_output_path, output_main_path)
                    print(
                        '-----------------------------------------------------------------'
                    )
                    print(
                        '-----------------------------------------------------------------'
                    )
                    print(str(e))
                    print(
                        '-----------------------------------------------------------------'
                    )
                    print(
                        '-----------------------------------------------------------------'
                    )

            # save to file
            pipeline_file.file_cat = 'M'
            img_path = os.path.join(os.path.dirname(img_file),
                                    pipeline_file.task_output_file_name)
            print("about to save the line contour file....." + img_path)
            cv2.imwrite(img_path, img[:, :, ::-1])

            print(
                "------------------------------ COMPLETE PHASE-1-0 for file " +
                event.src_path + "------------------------")

            # move the processed file

        except Exception as me:
            print(
                "------------------------------------------------- EXCEPTION PHASE_1-0---------------------------------------------------------"
            )
            print(str(me))
            print("could not process file " + str(event.src_path))
            print(
                "------------------------------------------------- END EXCEPTION PHASE_1-0---------------------------------------------------------"
            )
Ejemplo n.º 8
0
def main():
    checkpoint_path = '/home/dragonx/Documents/VideoText2018/EAST-master/weights/east_icdar2015_resnet_v1_50_rbox/'
    filename = '/media/dragonx/752d26ef-8f47-416d-b311-66c6dfabf4a3/Video Detection/ICDAR/train/Video_16_3_2.mp4'
    cap = cv2.VideoCapture(filename)
    parser = argparse.ArgumentParser()
    parser.add_argument('--checkpoint-path', default=checkpoint_path)
    args = parser.parse_args()

    if not os.path.exists(checkpoint_path):
        raise RuntimeError('Checkpoint `{}` not found'.format(checkpoint_path))
    # read images until it is completed
    index = 0
    logger.info('loading model')

    gpu_options = tf.GPUOptions(allow_growth=True)
    input_images = tf.placeholder(tf.float32,
                                  shape=[None, None, None, 3],
                                  name='input_images')
    global_step = tf.get_variable('global_step', [],
                                  initializer=tf.constant_initializer(0),
                                  trainable=False)

    f_score, f_geometry = model.model(input_images, is_training=False)

    variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
    saver = tf.train.Saver(variable_averages.variables_to_restore())
    # restore the model from weights
    sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))

    ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
    model_path = os.path.join(
        checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
    logger.info('Restore from {}'.format(model_path))
    saver.restore(sess, model_path)
    # get infos for video written
    frame_width = int(cap.get(3))
    frame_height = int(cap.get(4))
    # Define the codec and create VideoWriter object.The output is stored in 'outpy.avi' file.
    out = cv2.VideoWriter('EAST_testDemo1.avi',
                          cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 10,
                          (frame_width, frame_height))
    while (cap.isOpened()):
        ret, frame = cap.read()
        index = index + 1
        if ret == True:
            cv2.imshow('Frame', frame)
            print('Processing %d frame with ' % (index), frame.shape)
            ######### Use EAST text detector ###########
            start_time = time.time()
            img = frame
            rtparams = collections.OrderedDict()
            rtparams['start_time'] = datetime.datetime.now().isoformat()
            rtparams['image_size'] = '{}x{}'.format(img.shape[1], img.shape[0])
            timer = collections.OrderedDict([('net', 0), ('restore', 0),
                                             ('nms', 0)])

            im_resized, (ratio_h, ratio_w) = resize_image(img)
            rtparams['working_size'] = '{}x{}'.format(im_resized.shape[1],
                                                      im_resized.shape[0])
            start = time.time()
            score, geometry = sess.run(
                [f_score, f_geometry],
                feed_dict={input_images: [im_resized[:, :, ::-1]]})
            timer['net'] = time.time() - start

            boxes, timer = detect(score_map=score,
                                  geo_map=geometry,
                                  timer=timer)
            logger.info('net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format(
                timer['net'] * 1000, timer['restore'] * 1000,
                timer['nms'] * 1000))

            if boxes is not None:
                scores = boxes[:, 8].reshape(-1)
                boxes = boxes[:, :8].reshape((-1, 4, 2))
                boxes[:, :, 0] /= ratio_w
                boxes[:, :, 1] /= ratio_h

            duration = time.time() - start_time
            timer['overall'] = duration
            logger.info('[timing] {}'.format(duration))

            text_lines = []
            if boxes is not None:
                text_lines = []
                for box, score in zip(boxes, scores):
                    box = sort_poly(box.astype(np.int32))
                    if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(
                            box[3] - box[0]) < 5:
                        continue
                    tl = collections.OrderedDict(
                        zip(['x0', 'y0', 'x1', 'y1', 'x2', 'y2', 'x3', 'y3'],
                            map(float, box.flatten())))
                    tl['score'] = float(score)
                    text_lines.append(tl)
            ret = {
                'text_lines': text_lines,
                'rtparams': rtparams,
                'timing': timer,
            }

            new_img = draw_illu(img.copy(), ret)
            cv2.imshow('Annotated Frame with EAST', new_img)
            out.write(new_img)
            # Quit when Q is pressed
            if cv2.waitKey(25) & 0xFF == ord('q'):
                break
            time.sleep(.100)
        else:
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()
Ejemplo n.º 9
0
    model.eval()

    output_dir = 'images'
    ensure_folder(output_dir)

    transformer = data_transforms['test']

    im_fn_list = get_images_for_test()
    im_fn_list = random.sample(im_fn_list, 10)

    for idx in tqdm(range(len(im_fn_list))):
        im_fn = im_fn_list[idx]
        im = cv.imread(im_fn)
        im = im[..., ::-1]  # RGB

        im_resized, (ratio_h, ratio_w) = resize_image(im)
        im_resized = transforms.ToPILImage()(im_resized)
        im_resized = transformer(im_resized)
        im_resized = im_resized.to(device)
        im_resized = im_resized.unsqueeze(0)

        timer = {'net': 0, 'restore': 0, 'nms': 0}

        score, geometry = model(im_resized)

        score = score.permute(0, 2, 3, 1)
        geometry = geometry.permute(0, 2, 3, 1)
        score = score.data.cpu().numpy()
        geometry = geometry.data.cpu().numpy()

        boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer)
Ejemplo n.º 10
0
    def predictor(img):
        """
        :return: {
            'text_lines': [
                {
                    'score': ,
                    'x0': ,
                    'y0': ,
                    'x1': ,
                    ...
                    'y3': ,
                }
            ],
            'rtparams': {  # runtime parameters
                'image_size': ,
                'working_size': ,
            },
            'timing': {
                'net': ,
                'restore': ,
                'nms': ,
                'cpuinfo': ,
                'meminfo': ,
                'uptime': ,
            }
        }
        """
        start_time = time.time()
        rtparams = collections.OrderedDict()
        rtparams['start_time'] = datetime.datetime.now().isoformat()
        rtparams['image_size'] = '{}x{}'.format(img.shape[1], img.shape[0])
        timer = collections.OrderedDict([
            ('net', 0),
            ('restore', 0),
            ('nms', 0)
        ])

        im_resized, (ratio_h, ratio_w) = resize_image(img)
        rtparams['working_size'] = '{}x{}'.format(
            im_resized.shape[1], im_resized.shape[0])
        start = time.time()
        score, geometry = sess.run(
            [f_score, f_geometry],
            feed_dict={input_images: [im_resized[:,:,::-1]]})
        timer['net'] = time.time() - start

        boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer)
        logger.info('net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format(
            timer['net']*1000, timer['restore']*1000, timer['nms']*1000))

        if boxes is not None:
            scores = boxes[:,8].reshape(-1)
            boxes = boxes[:, :8].reshape((-1, 4, 2))
            boxes[:, :, 0] /= ratio_w
            boxes[:, :, 1] /= ratio_h

        duration = time.time() - start_time
        timer['overall'] = duration
        logger.info('[timing] {}'.format(duration))

        text_lines = []
        if boxes is not None:
            text_lines = []
            for box, score in zip(boxes, scores):
                box = sort_poly(box.astype(np.int32))
                if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5:
                    continue
                tl = collections.OrderedDict(zip(
                    ['x0', 'y0', 'x1', 'y1', 'x2', 'y2', 'x3', 'y3'],
                    map(float, box.flatten())))
                tl['score'] = float(score)
                text_lines.append(tl)
        ret = {
            'text_lines': text_lines,
            'rtparams': rtparams,
            'timing': timer,
        }
        ret.update(get_host_info())
        return ret