Ejemplo n.º 1
0
class Recognizer:
    def __init__(self, buttonGraph = None, charGraph = None, buttonLabel = None, verbose = True):
        self.detector = ButtonDetector(buttonGraph, buttonLabel)
        self.recognizer = CharacterRecognizer(charGraph)
        self.verbose = verbose
        print('Recognizer ready!')

    def __button_candidates__(self, boxes, scores, image):
        img_height = image.shape[0]
        img_width = image.shape[1]

        button_scores = []
        button_patches = []
        button_positions = []

        for box, score in zip(boxes, scores):
            if score < 0.5: continue

            y_min = int(box[0] * img_height)
            x_min = int(box[1] * img_width)
            y_max = int(box[2] * img_height)
            x_max = int(box[3] * img_width)

            button_patch = image[y_min: y_max, x_min: x_max]
            button_patch = cv2.resize(button_patch, (180, 180))

            button_scores.append(score)
            button_patches.append(button_patch)
            button_positions.append([x_min, y_min, x_max, y_max])
        
        return button_patches, button_positions, button_scores 
        
    # takes in PIL image
    # returns bounding box of button, recognized character
    def recognize(self, image):
      start = time.time()  
      
      img_np = np.asarray(image)
      boxes, scores, _ = self.detector.predict(img_np)
      button_patches, button_positions, _ = self.__button_candidates__(boxes, scores, img_np)

      button_text = []
      for button_img, button_pos in zip(button_patches, button_positions):
        text, score = self.recognizer.predict(button_img)
        button_text.append(text)

      end = time.time()
      if (self.verbose): 
          print('Recognition completed in %f seconds' % (end - start))
          return button_positions, button_text
      else:
          return button_positions, button_text, end - start
Ejemplo n.º 2
0
        for image_name in files:
            image_name_list.append(image_name.split('.')[0])
    return image_name_list


def warm_up(detector, recognizer):
    image = imageio.imread('./test_panels/1.jpg')
    button = imageio.imread('./test_buttons/0_0.png')
    detector.predict(image)
    recognizer.predict(button)


if __name__ == '__main__':
    data_dir = './test_panels'
    data_list = get_image_name_list(data_dir)
    detector = ButtonDetector()
    recognizer = CharacterRecognizer(verbose=False)
    warm_up(detector, recognizer)
    overall_time = 0
    for data in data_list:
        img_path = os.path.join(data_dir, data + '.jpg')
        img_np = np.asarray(PIL.Image.open(tf.gfile.GFile(img_path)))
        t0 = cv2.getTickCount()
        boxes, scores, _ = detector.predict(img_np)
        button_patches, button_positions, _ = button_candidates(
            boxes, scores, img_np)
        for button_img in button_patches:
            button_text, button_score, _ = recognizer.predict(button_img)
        t1 = cv2.getTickCount()
        time = (t1 - t0) / cv2.getTickFrequency()
        overall_time += time
Ejemplo n.º 3
0
def initialize():
    data_dir = './test_panels'
    data_list = get_image_name_list(data_dir)
    detector = ButtonDetector()
    recognizer = CharacterRecognizer(verbose=False)
    return detector, recognizer
Ejemplo n.º 4
0

def get_image_name_list(target_path):
    assert os.path.exists(target_path)
    image_name_list = []
    file_set = os.walk(target_path)
    for root, dirs, files in file_set:
        for image_name in files:
            image_name_list.append(image_name.split('.')[0])
    return image_name_list


if __name__ == '__main__':
    data_dir = './test_panels'
    data_list = get_image_name_list(data_dir)
    detector = ButtonDetector()
    recognizer = CharacterRecognizer()
    overall_time = 0
    for data in data_list:
        img_path = os.path.join(data_dir, data + '.jpg')
        img_np = np.asarray(PIL.Image.open(tf.gfile.GFile(img_path)))
        t0 = cv2.getTickCount()

        boxes, scores, _ = detector.predict(img_np, True)
        button_patches, button_positions, _ = button_candidates(
            boxes, scores, img_np)

        for button_img, button_pos in zip(button_patches, button_positions):
            button_text, button_score, button_draw = recognizer.predict(
                button_img, draw=True)
            x_min, y_min, x_max, y_max = button_pos
Ejemplo n.º 5
0
 def __init__(self, buttonGraph = None, charGraph = None, buttonLabel = None, verbose = True):
     self.detector = ButtonDetector(buttonGraph, buttonLabel)
     self.recognizer = CharacterRecognizer(charGraph)
     self.verbose = verbose
     print('Recognizer ready!')
Ejemplo n.º 6
0
class ButtonRecognition:
    def __init__(self):
        self.detector = ButtonDetector()
        self.recognizer = CharacterRecognizer(verbose=False)
        self.warmup()

    def warmup(self, panel_path='./test_panels/image1.jpg', button_path='./test_panels/test_button.png'):
        image = imageio.imread(panel_path)
        button = imageio.imread(button_path)
        self.detector.predict(image)
        self.recognizer.predict(button)

    def button_candidates(self, boxes, scores, image_np, score_threshold=0.5, press_threshold=19):
        # TBD: press detection needs to be optimized or modified based on environments
        avg_col = np.mean(image_np[:,:,2])
        #print("average color", avg_col, " ##################")
        #if avg_col > 125: avg_col = 160
        img_height = image_np.shape[0]
        img_width = image_np.shape[1]

        button_scores = []
        button_patches = []
        button_positions = []
        button_presses = []

        for box, score in zip(boxes, scores):
            if score < score_threshold: continue

            y_min = int(box[0] * img_height)
            x_min = int(box[1] * img_width)
            y_max = int(box[2] * img_height)
            x_max = int(box[3] * img_width)

            button_patch = image_np[y_min: y_max, x_min: x_max]
            
            button_scores.append(score)
            button_patches.append(button_patch)
            button_positions.append([x_min, y_min, x_max, y_max])

            buf = np.copy(button_patch)#[:,:,2])
            buf[buf != 255] = 0 #buf[buf > 0] = 1
            v = np.sum(buf)/(max(img_height,img_width))
            #print(np.mean(button_patch[:,:,2]), v)
            
            button_presses.append(False)
            if (avg_col < 60 and v > press_threshold*10) or \
               (avg_col >= 60 and v > press_threshold): button_presses[-1] = True
            
        return button_patches, button_positions, button_scores, button_presses

    def draw_detection_result(self, image_np, boxes, classes, scores, category, predict_chars=None):
        vis_util.visualize_boxes_and_labels_on_image_array(
            image_np,
            np.squeeze(boxes),
            np.squeeze(classes).astype(np.int32),
            np.squeeze(scores),
            category,
            max_boxes_to_draw=100,
            use_normalized_coordinates=True,
            line_thickness=5,
            predict_chars=predict_chars
        )

    def draw_recognition_result(self, image_np, recognitions):
        
        for (button_patch,_,chars,_,pos,press) in recognitions:    
            # generate image layer for drawing
            img_pil = Image.fromarray(button_patch)            
            img_show = ImageDraw.Draw(img_pil)
            # draw at a proper location
            x_center = button_patch.shape[1] / 2.0
            y_center = button_patch.shape[0] / 2.0
            font_size = min(x_center, y_center)*.8
            font = ImageFont.truetype("/usr/share/fonts/truetype/freefont/FreeMono.ttf", int(font_size))
            text_center = int(x_center), int(y_center)
            text_color = (0, 0, 255) if press == True else (255, 0, 0)
            #print(text_color, press)
            img_show.text(text_center, text=chars, fill=text_color, font=font)
            image_np[pos[1]: pos[3], pos[0]: pos[2]] = np.array(img_pil)

    def predict(self, image_np, draw=False, image_f=False):
        recognitions = []
        boxes, scores, _, classes = self.detector.predict(image_np)
        button_patches, button_positions, button_scores, button_presses = self.button_candidates(boxes, scores, image_np)
        for i, button_img in enumerate(button_patches):
            button_text, text_score, _ = self.recognizer.predict(button_img)
            if image_f: button_presses[i] = 1 - button_presses[i]
            recognitions.append((button_img, button_scores[i], button_text, text_score, button_positions[i], button_presses[i]))

        if draw:
            self.draw_detection_result(image_np, boxes, classes, scores, self.detector.category_index)
            self.draw_recognition_result(image_np, recognitions)

        return image_np, recognitions

    def clear_session(self):
        self.detector.clear_session()
        self.recognizer.clear_session()

# end of file 
Ejemplo n.º 7
0
 def __init__(self):
     self.detector = ButtonDetector()
     self.recognizer = CharacterRecognizer(verbose=False)
     self.warmup()