def __init__(self, data_set_path: str, transform=None, alphabet='russian'): self.mapping = self._load_file( path=os.path.join(data_set_path, 'mapping.json')) self.page = cv2.imread(os.path.join(data_set_path, 'page.tif'), cv2.IMREAD_COLOR) self.transform = transform self.encoder_decoder = LabelEncoderDecoder(alphabet=alphabet)
def run_russian(args): encoder_decoder = LabelEncoderDecoder(alphabet='russian') subdirs = os.listdir(args.directory) for dir in subdirs: dir_path = os.path.join(args.directory, dir) image_names = os.listdir(dir_path) for name in image_names: path = os.path.join(dir_path, name) stream = open(path, 'rb') bytes = bytearray(stream.read()) array_path = np.asarray(bytes, dtype=np.uint8) image = cv2.imdecode(array_path, cv2.IMREAD_COLOR) gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) letter = LetterImage(image=gray_image) binary_letter = letter.to_binary(thresh_value=args.thresh, dirty_frame_size=args.frame_size) closed_letter = binary_letter.with_morph_closing(kernel_size=args.kernel_size) # de_skewed_letter = closed_letter.deskew() roi = closed_letter.extract_roi(orig_image=image, min_piece_area=args.min_area) try: author_id = int(name[:4]) save_path = os.path.join(args.save_dir, str(encoder_decoder.encode_character(character=dir)), str(author_id)) save_name = int(name[6 + 1: -4]) roi.save(to=save_path, name=str(save_name) + '.png') except: print(dir)
def run_labels_check(args): label_encoder_decoder = LabelEncoderDecoder(max_word_len=args.max_length, alphabet=args.alphabet) labels = np.load(os.path.join(args.save_to, 'labels.npy')) images = np.load(os.path.join(args.save_to, 'data.npy')) for i in range(200, 250): image = images[i] label = labels[i] print(label_encoder_decoder.decode_word(array=label)) image = image.reshape(64, 512) cv2.imwrite(os.path.join(r'D:\words', str(i) + '.png'), image)
def __init__(self, data_set_dir: str, min_page_index: int, max_page_index: int, transform=None, alphabet='russian'): self.directory = data_set_dir self.min_page = min_page_index self.max_page = max_page_index self.folders = os.listdir(data_set_dir)[min_page_index:max_page_index] self.transform = transform self.encoder_decoder = LabelEncoderDecoder(alphabet=alphabet)
class TestWordsDataset(Dataset): def __init__(self, data_set_path: str, transform=None, alphabet='russian'): self.mapping = self._load_file( path=os.path.join(data_set_path, 'mapping.json')) self.page = cv2.imread(os.path.join(data_set_path, 'page.tif'), cv2.IMREAD_COLOR) self.transform = transform self.encoder_decoder = LabelEncoderDecoder(alphabet=alphabet) @staticmethod def _load_file(path: str): with open(path, encoding='utf-8') as f: data = json.load(f) return data def __len__(self): return len(self.mapping['outputs']['object']) def __getitem__(self, idx): data = self.mapping['outputs']['object'] x_min = data[idx]['bndbox']['xmin'] y_min = data[idx]['bndbox']['ymin'] x_max = data[idx]['bndbox']['xmax'] y_max = data[idx]['bndbox']['ymax'] label = data[idx]['name'] label = self.encoder_decoder.encode_word(word=label) image = self.page[y_min:y_max, x_min:x_max, :] image = image_resize(image, height=54) image_height = image.shape[0] image_width = image.shape[1] result = np.ones((64, 512, 3), dtype=np.uint8) * 255 result[int((64 - image_height) / 2):image_height + int((64 - image_height) / 2), 0:image_width, :] = image image = result image = image.transpose(2, 0, 1) sample = { Name.LABEL.value: label.astype(int), Name.IMAGE.value: image, Name.LABEL_LEN.value: self.encoder_decoder.decode_word_len(array=label) } if self.transform: sample = self.transform(sample) return sample
def run(args): image = cv2.imread(args.image_path, cv2.IMREAD_GRAYSCALE) cv2.imshow('img', image) cv2.waitKey(0) thresh, img_bin = cv2.threshold(image, args.thresh, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) img_bin = 255 - img_bin cv2.imshow('img', img_bin) cv2.waitKey(0) img_bin = image_resize(image=img_bin, height=64) cv2.imshow('img', img_bin) cv2.waitKey(0) result = np.zeros((args.image_height, args.image_width)) result[:, :img_bin.shape[1]] = img_bin cv2.imshow('img', result) cv2.waitKey(0) # data = np.load(args.data_path) model = CRNN(image_height=args.image_height, num_of_channels=args.num_of_channels, num_of_classes=args.num_of_classes, num_of_lstm_hidden_units=args.num_of_lstm_hidden_units) print('loading pretrained model from %s' % args.model_path) model.load_state_dict(torch.load(args.model_path)) converter = LabelEncoderDecoder() image = result image = image.reshape(1, args.num_of_channels, args.image_height, args.image_width) image = Variable(torch.FloatTensor(image)) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) print(preds) print(converter.decode_word(array=preds)) print( converter.decode_word(array=converter.from_raw_to_label(array=preds)))
def run_test_real(args): model = CRNN(image_height=args.image_height, num_of_channels=args.num_of_channels, num_of_classes=args.num_of_classes, num_of_lstm_hidden_units=args.num_of_lstm_hidden_units) print('loading pretrained model from %s' % args.model_path) model.load_state_dict(torch.load(args.model_path)) converter = LabelEncoderDecoder(alphabet='russian') with open(args.data_path, encoding='utf-8') as f: data = json.load(f) page = cv2.imread(data['path'], cv2.IMREAD_COLOR) i = 6 print(data['outputs']['object']) x_min = data['outputs']['object'][i]['bndbox']['xmin'] y_min = data['outputs']['object'][i]['bndbox']['ymin'] x_max = data['outputs']['object'][i]['bndbox']['xmax'] y_max = data['outputs']['object'][i]['bndbox']['ymax'] image = page[y_min: y_max, x_min: x_max, :] image = image_resize(image, height=54) image_height = image.shape[0] image_width = image.shape[1] result = np.ones((64, 512, 3), dtype=np.uint8) * 255 result[5: image_height + 5, 0: image_width, :] = image image = result cv2.imshow('img', image) cv2.waitKey(0) image = image.reshape(1, args.num_of_channels, args.image_height, args.image_width) image = Variable(torch.FloatTensor(image)) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) print(preds) print(converter.decode_word(array=preds)) print(converter.decode_word(array=converter.from_raw_to_label(array=preds)))
def run_test_synthetic(args): model = CRNN(image_height=args.image_height, num_of_channels=args.num_of_channels, num_of_classes=args.num_of_classes, num_of_lstm_hidden_units=args.num_of_lstm_hidden_units) print('loading pretrained model from %s' % args.model_path) model.load_state_dict(torch.load(args.model_path)) folders = os.listdir(args.data_path) random_folder = np.random.randint(len(folders)) path = os.path.join(args.data_path, str(random_folder)) page = cv2.imread(os.path.join(path, 'page.png'), cv2.IMREAD_COLOR) label_data = np.load(os.path.join(path, 'labels.npy')) label_data = label_data[0] coords = label_data[16:] min_h = coords[0] max_h = coords[1] min_w = coords[2] max_w = coords[3] image = np.ones((64, 512, 3), dtype=np.uint8) * 255 image[:, :max_w - min_w, :] = page[min_h: max_h, min_w: max_w, :] coder = LabelEncoderDecoder(alphabet='russian') cv2.imshow('img', image) cv2.waitKey(0) image = image.reshape(1, args.num_of_channels, args.image_height, args.image_width) image = Variable(torch.FloatTensor(image)) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) print(coder.decode_word(array=coder.from_raw_to_label(array=preds)))
def test(criterion, model, test_loader, test_image) -> (float, float): model.eval() test_loss = 0 correct = 0 encoder_decoder = LabelEncoderDecoder(alphabet='russian') with torch.no_grad(): for sample in test_loader: data, targets, target_lens = sample[Name.IMAGE.value], sample[ Name.LABEL.value], sample[Name.LABEL_LEN.value] loadData(test_image, data) log_probs = model(test_image) preds_size = Variable( torch.tensor([log_probs.size(0)] * log_probs.shape[1], dtype=torch.int32)) targets = concat_targets(targets=targets, target_lengths=target_lens) test_loss += criterion( log_probs=log_probs, targets=targets, input_lengths=preds_size, target_lengths=target_lens).item() # sum up batch loss _, probs = log_probs.max(2) probs = probs.transpose(1, 0) preds = [] for prob in probs: preds.append( encoder_decoder.from_raw_to_label(prob.cpu().numpy())) preds = np.asarray(preds) for pred, target in zip(preds, targets.cpu().numpy()): if np.array_equal(pred, target): correct += 1 test_loss /= (len(test_loader.dataset) / test_loader.batch_size) return test_loss, 100. * correct / len(test_loader.dataset)
def run(args): label_encoder_decoder = LabelEncoderDecoder(max_word_len=args.max_length, alphabet=args.alphabet) creator = RussianDatasetCreator(dataset=args.data_path, words_path=args.words_path, pad_value=0, word_height=args.word_height, min_letter_size=37, max_letter_size=41, tall_to_low_letter_coef=1.5, label_encoder_decoder=label_encoder_decoder, save_path=args.save_to) creator.create_russian()
def run(args): label_encoder_decoder = LabelEncoderDecoder(max_word_len=args.max_length, alphabet=args.alphabet) creator = EnglishDatasetCreator(dataset=args.data_path, words_path=args.words_path, pad_value=1, word_height=args.word_height, min_letter_size=37, max_letter_size=41, tall_to_low_letter_coef=1.3, label_encoder_decoder=label_encoder_decoder) labels, images = creator.create(words_count=args.words_count) np.save(os.path.join(args.save_to, 'data.npy'), images) np.save(os.path.join(args.save_to, 'labels.npy'), labels)
class WordsDataset(Dataset): def __init__(self, data_set_dir: str, min_page_index: int, max_page_index: int, transform=None, alphabet='russian'): self.directory = data_set_dir self.min_page = min_page_index self.max_page = max_page_index self.folders = os.listdir(data_set_dir)[min_page_index:max_page_index] self.transform = transform self.encoder_decoder = LabelEncoderDecoder(alphabet=alphabet) def __len__(self): return self.max_page - self.min_page def __getitem__(self, idx): folder_path = os.path.join(self.directory, self.folders[idx]) page = cv2.imread(os.path.join(folder_path, 'page.png'), cv2.IMREAD_COLOR) page_labels = np.load(os.path.join(folder_path, 'labels.npy')) random_word_index = np.random.randint(len(page_labels)) word_index = page_labels[random_word_index] label = word_index[:16] coords = word_index[16:] min_h = coords[0] max_h = coords[1] min_w = coords[2] max_w = coords[3] image = np.ones((64, 512, 3), dtype=np.uint8) * 255 image[:, :max_w - min_w, :] = page[min_h:max_h, min_w:max_w, :] image = image.transpose(2, 0, 1) sample = { Name.LABEL.value: label.astype(int), Name.IMAGE.value: image, Name.LABEL_LEN.value: self.encoder_decoder.decode_word_len(array=label) } if self.transform: sample = self.transform(sample) return sample