def display_enc_dec_images(images, intermediate_height, intermediate_width): """ Displays a list of images after the process of encoding and decoding :param images: numpy array of images :param intermediate_height: int :param intermediate_width: int :return: None """ columns, rows = 3, len(images) fig = plt.figure() # ax enables access to manipulate each of subplots ax = [] for row in range(rows): raw_img = images[row] encoded = encode_symbol(raw_img, intermediate_height, intermediate_width, erode=True) decoded = decode_symbol(encoded, INPUT_HEIGHT, INPUT_WIDTH) ax.append(fig.add_subplot(rows, columns, columns * row + 1)) plt.imshow(raw_img, alpha=0.25, cmap='gray') ax.append(fig.add_subplot(rows, columns, columns * row + 2)) plt.imshow(encoded, alpha=0.25, cmap='gray') ax.append(fig.add_subplot(rows, columns, columns * row + 3)) plt.imshow(decoded, alpha=0.25, cmap='gray') plt.colorbar() plt.show()
def show_frames(self, frame, symbols, expressions): """ Displays the symbols found in the image, as well as additional frame to show how the classifier 'sees' the symbols candidates :param frame: input image :param symbols: list of symbolboxes found in the image :param expressions: list of expressionboxes found in the image :return: None """ super().show_frames(frame, symbols, expressions) frame_height, frame_width = frame.shape[0], frame.shape[1] decoded_frame = 255 * np.ones( (frame_height, frame_width), dtype=np.uint8) m = len(symbols) for i in range(m): symbol = symbols[i] if symbol.top < frame_height - INPUT_HEIGHT and symbol.left < frame_width - INPUT_WIDTH: encoded_symbol_box = encode_symbol(symbol.get_raw_box(), erode=False) encoded_decoded_symbol_box = decode_symbol( encoded_symbol_box, INPUT_HEIGHT, INPUT_WIDTH) decoded_frame[symbol.top:symbol.top + INPUT_HEIGHT, symbol.left:symbol.left + INPUT_WIDTH] = encoded_decoded_symbol_box cv2.putText(decoded_frame, symbol.prediction_cls, org=(symbol.left, symbol.top), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0, 255, 0), thickness=2) cv2.imshow('classification frame', decoded_frame)
def preprocess_symbol(img, target_channel_size): """ First step transformation of a symbol image before it can be merged with background :param img: input symbol image :param target_channel_size: 2 for gray, 3 for bgr :return: transformed symbol image """ assert len(img.shape) == 2 img = encode_symbol(img, *img.shape, erode=False) img = decode_symbol(img, *img.shape) if target_channel_size == 2: pass elif target_channel_size == 3: img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) return img
def generate_encoded_decoded_dataset(source_folder_path, target_folder_path, intermediate_shape): """ Transforms the symbols specified in the source folder into more appropriate shape and saves in the target folder :param source_folder_path: path to folder with input .npz symbols :param target_folder_path: path to folder with output folder :param intermediate_shape: intermediate shape (height, width) of the symbol image :return: None """ for filename in os.listdir(source_folder_path): symbol_name = filename[:-4] print("Resizing symbol: {}".format(symbol_name)) source_dataset = np.load(pjoin(source_folder_path, filename)) X, y = source_dataset['X'], source_dataset['y'] count, newX, newY = 0, [], [] for example in range(X.shape[0]): symbol = X[example] # append unmodified example newX.append(X[0]) newY.append(symbol_name) # append differently transformed images for intermediate_height, intermediate_width in intermediate_shape: encoded = sl.encode_symbol(symbol, intermediate_height, intermediate_width, erode=True) decoded = sl.decode_symbol(encoded, INPUT_HEIGHT, INPUT_WIDTH) newX.append(decoded) newY.append(symbol_name) count += 1 if count % 100 == 0: print("Transformed {} images for symbol {}".format( count, symbol_name)) newX = np.array(newX) newY = np.array(newY, dtype='|S6') # data type: string save_dataset(newX, newY, pjoin(target_folder_path, filename))
def get_symbols(self, frame): """Returns symbol boxes found in the camera frame""" gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) classification_image = cv2.adaptiveThreshold(gray_frame, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, \ cv2.THRESH_BINARY, 7, 2.5) localization_image = cv2.erode(classification_image, np.ones((2, 2), np.uint8), iterations=1) symbols_positions = self._get_symbols_candidates_location( localization_image) # get raw unprocessed symbol boxes symbols = (classification_image[top:bottom, left:right] for (top, left, bottom, right) in symbols_positions) # encode them to filter only the most relevant features, do not rescale yet symbols = [ encode_symbol(symbol, None, None, erode=False) for symbol in symbols ] # decode them to further bring out the most relevant features and rescale symbols = [ decode_symbol(symbol, INPUT_HEIGHT, INPUT_WIDTH) for symbol in symbols ] # reshape to fit the classifier format symbols = np.array(symbols).reshape(len(symbols), INPUT_HEIGHT, INPUT_WIDTH) # make predictions on the symbols labels, probs = self.symbol_classifier.predict(symbols) symbol_boxes = [ SymbolBox(classification_image, top, left, bottom, right, prob, label.decode()) for (top, left, bottom, right), prob, label in zip(symbols_positions, probs, labels) ] return symbol_boxes
def test_encode_symbol(self): image = np.zeros((32,32)) encoded = encode_symbol(image, 45, 45) self.assertEqual(encoded.shape, (45, 45))