コード例 #1
0
 def __init__(self, data_set_path: str, transform=None, alphabet='russian'):
     self.mapping = self._load_file(
         path=os.path.join(data_set_path, 'mapping.json'))
     self.page = cv2.imread(os.path.join(data_set_path, 'page.tif'),
                            cv2.IMREAD_COLOR)
     self.transform = transform
     self.encoder_decoder = LabelEncoderDecoder(alphabet=alphabet)
コード例 #2
0
def run_russian(args):
    encoder_decoder = LabelEncoderDecoder(alphabet='russian')
    subdirs = os.listdir(args.directory)

    for dir in subdirs:
        dir_path = os.path.join(args.directory, dir)

        image_names = os.listdir(dir_path)
        for name in image_names:
            path = os.path.join(dir_path, name)
            stream = open(path, 'rb')
            bytes = bytearray(stream.read())
            array_path = np.asarray(bytes, dtype=np.uint8)
            image = cv2.imdecode(array_path, cv2.IMREAD_COLOR)
            gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            letter = LetterImage(image=gray_image)
            binary_letter = letter.to_binary(thresh_value=args.thresh,
                                             dirty_frame_size=args.frame_size)
            closed_letter = binary_letter.with_morph_closing(kernel_size=args.kernel_size)
            # de_skewed_letter = closed_letter.deskew()
            roi = closed_letter.extract_roi(orig_image=image,
                                            min_piece_area=args.min_area)
            try:
                author_id = int(name[:4])
                save_path = os.path.join(args.save_dir,
                                         str(encoder_decoder.encode_character(character=dir)),
                                         str(author_id))
                save_name = int(name[6 + 1: -4])
                roi.save(to=save_path, name=str(save_name) + '.png')
            except:
                print(dir)
コード例 #3
0
def run_labels_check(args):
    label_encoder_decoder = LabelEncoderDecoder(max_word_len=args.max_length,
                                                alphabet=args.alphabet)
    labels = np.load(os.path.join(args.save_to, 'labels.npy'))
    images = np.load(os.path.join(args.save_to, 'data.npy'))
    for i in range(200, 250):
        image = images[i]
        label = labels[i]
        print(label_encoder_decoder.decode_word(array=label))
        image = image.reshape(64, 512)
        cv2.imwrite(os.path.join(r'D:\words', str(i) + '.png'), image)
コード例 #4
0
 def __init__(self,
              data_set_dir: str,
              min_page_index: int,
              max_page_index: int,
              transform=None,
              alphabet='russian'):
     self.directory = data_set_dir
     self.min_page = min_page_index
     self.max_page = max_page_index
     self.folders = os.listdir(data_set_dir)[min_page_index:max_page_index]
     self.transform = transform
     self.encoder_decoder = LabelEncoderDecoder(alphabet=alphabet)
コード例 #5
0
class TestWordsDataset(Dataset):
    def __init__(self, data_set_path: str, transform=None, alphabet='russian'):
        self.mapping = self._load_file(
            path=os.path.join(data_set_path, 'mapping.json'))
        self.page = cv2.imread(os.path.join(data_set_path, 'page.tif'),
                               cv2.IMREAD_COLOR)
        self.transform = transform
        self.encoder_decoder = LabelEncoderDecoder(alphabet=alphabet)

    @staticmethod
    def _load_file(path: str):
        with open(path, encoding='utf-8') as f:
            data = json.load(f)
        return data

    def __len__(self):
        return len(self.mapping['outputs']['object'])

    def __getitem__(self, idx):
        data = self.mapping['outputs']['object']
        x_min = data[idx]['bndbox']['xmin']
        y_min = data[idx]['bndbox']['ymin']
        x_max = data[idx]['bndbox']['xmax']
        y_max = data[idx]['bndbox']['ymax']

        label = data[idx]['name']
        label = self.encoder_decoder.encode_word(word=label)

        image = self.page[y_min:y_max, x_min:x_max, :]
        image = image_resize(image, height=54)

        image_height = image.shape[0]
        image_width = image.shape[1]

        result = np.ones((64, 512, 3), dtype=np.uint8) * 255

        result[int((64 - image_height) / 2):image_height +
               int((64 - image_height) / 2), 0:image_width, :] = image
        image = result

        image = image.transpose(2, 0, 1)

        sample = {
            Name.LABEL.value: label.astype(int),
            Name.IMAGE.value: image,
            Name.LABEL_LEN.value:
            self.encoder_decoder.decode_word_len(array=label)
        }

        if self.transform:
            sample = self.transform(sample)
        return sample
コード例 #6
0
def run(args):
    image = cv2.imread(args.image_path, cv2.IMREAD_GRAYSCALE)
    cv2.imshow('img', image)
    cv2.waitKey(0)

    thresh, img_bin = cv2.threshold(image, args.thresh, 255,
                                    cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    img_bin = 255 - img_bin

    cv2.imshow('img', img_bin)
    cv2.waitKey(0)

    img_bin = image_resize(image=img_bin, height=64)

    cv2.imshow('img', img_bin)
    cv2.waitKey(0)
    result = np.zeros((args.image_height, args.image_width))
    result[:, :img_bin.shape[1]] = img_bin

    cv2.imshow('img', result)
    cv2.waitKey(0)
    # data = np.load(args.data_path)

    model = CRNN(image_height=args.image_height,
                 num_of_channels=args.num_of_channels,
                 num_of_classes=args.num_of_classes,
                 num_of_lstm_hidden_units=args.num_of_lstm_hidden_units)

    print('loading pretrained model from %s' % args.model_path)
    model.load_state_dict(torch.load(args.model_path))

    converter = LabelEncoderDecoder()

    image = result

    image = image.reshape(1, args.num_of_channels, args.image_height,
                          args.image_width)

    image = Variable(torch.FloatTensor(image))

    model.eval()
    preds = model(image)
    _, preds = preds.max(2)

    preds = preds.transpose(1, 0).contiguous().view(-1)

    print(preds)
    print(converter.decode_word(array=preds))
    print(
        converter.decode_word(array=converter.from_raw_to_label(array=preds)))
コード例 #7
0
def run_test_real(args):
    model = CRNN(image_height=args.image_height,
                 num_of_channels=args.num_of_channels,
                 num_of_classes=args.num_of_classes,
                 num_of_lstm_hidden_units=args.num_of_lstm_hidden_units)

    print('loading pretrained model from %s' % args.model_path)
    model.load_state_dict(torch.load(args.model_path))

    converter = LabelEncoderDecoder(alphabet='russian')

    with open(args.data_path, encoding='utf-8') as f:
        data = json.load(f)

    page = cv2.imread(data['path'], cv2.IMREAD_COLOR)
    i = 6
    print(data['outputs']['object'])
    x_min = data['outputs']['object'][i]['bndbox']['xmin']
    y_min = data['outputs']['object'][i]['bndbox']['ymin']
    x_max = data['outputs']['object'][i]['bndbox']['xmax']
    y_max = data['outputs']['object'][i]['bndbox']['ymax']

    image = page[y_min: y_max, x_min: x_max, :]
    image = image_resize(image, height=54)

    image_height = image.shape[0]
    image_width = image.shape[1]

    result = np.ones((64, 512, 3), dtype=np.uint8) * 255

    result[5: image_height + 5, 0: image_width, :] = image
    image = result
    cv2.imshow('img', image)
    cv2.waitKey(0)

    image = image.reshape(1, args.num_of_channels, args.image_height, args.image_width)

    image = Variable(torch.FloatTensor(image))

    model.eval()
    preds = model(image)
    _, preds = preds.max(2)

    preds = preds.transpose(1, 0).contiguous().view(-1)

    print(preds)
    print(converter.decode_word(array=preds))
    print(converter.decode_word(array=converter.from_raw_to_label(array=preds)))
コード例 #8
0
def run_test_synthetic(args):
    model = CRNN(image_height=args.image_height,
                 num_of_channels=args.num_of_channels,
                 num_of_classes=args.num_of_classes,
                 num_of_lstm_hidden_units=args.num_of_lstm_hidden_units)

    print('loading pretrained model from %s' % args.model_path)
    model.load_state_dict(torch.load(args.model_path))

    folders = os.listdir(args.data_path)
    random_folder = np.random.randint(len(folders))
    path = os.path.join(args.data_path, str(random_folder))
    page = cv2.imread(os.path.join(path, 'page.png'), cv2.IMREAD_COLOR)
    label_data = np.load(os.path.join(path, 'labels.npy'))
    label_data = label_data[0]
    coords = label_data[16:]
    min_h = coords[0]
    max_h = coords[1]
    min_w = coords[2]
    max_w = coords[3]
    image = np.ones((64, 512, 3), dtype=np.uint8) * 255
    image[:, :max_w - min_w, :] = page[min_h: max_h, min_w: max_w, :]

    coder = LabelEncoderDecoder(alphabet='russian')

    cv2.imshow('img', image)
    cv2.waitKey(0)

    image = image.reshape(1, args.num_of_channels, args.image_height, args.image_width)

    image = Variable(torch.FloatTensor(image))

    model.eval()
    preds = model(image)
    _, preds = preds.max(2)

    preds = preds.transpose(1, 0).contiguous().view(-1)

    print(coder.decode_word(array=coder.from_raw_to_label(array=preds)))
コード例 #9
0
    def test(criterion, model, test_loader, test_image) -> (float, float):
        model.eval()
        test_loss = 0
        correct = 0
        encoder_decoder = LabelEncoderDecoder(alphabet='russian')
        with torch.no_grad():
            for sample in test_loader:
                data, targets, target_lens = sample[Name.IMAGE.value], sample[
                    Name.LABEL.value], sample[Name.LABEL_LEN.value]
                loadData(test_image, data)
                log_probs = model(test_image)

                preds_size = Variable(
                    torch.tensor([log_probs.size(0)] * log_probs.shape[1],
                                 dtype=torch.int32))
                targets = concat_targets(targets=targets,
                                         target_lengths=target_lens)
                test_loss += criterion(
                    log_probs=log_probs,
                    targets=targets,
                    input_lengths=preds_size,
                    target_lengths=target_lens).item()  # sum up batch loss

                _, probs = log_probs.max(2)

                probs = probs.transpose(1, 0)
                preds = []
                for prob in probs:
                    preds.append(
                        encoder_decoder.from_raw_to_label(prob.cpu().numpy()))
                preds = np.asarray(preds)

                for pred, target in zip(preds, targets.cpu().numpy()):
                    if np.array_equal(pred, target):
                        correct += 1

        test_loss /= (len(test_loader.dataset) / test_loader.batch_size)
        return test_loss, 100. * correct / len(test_loader.dataset)
def run(args):
    label_encoder_decoder = LabelEncoderDecoder(max_word_len=args.max_length,
                                                alphabet=args.alphabet)
    creator = RussianDatasetCreator(dataset=args.data_path,
                                    words_path=args.words_path,
                                    pad_value=0,
                                    word_height=args.word_height,
                                    min_letter_size=37,
                                    max_letter_size=41,
                                    tall_to_low_letter_coef=1.5,
                                    label_encoder_decoder=label_encoder_decoder,
                                    save_path=args.save_to)

    creator.create_russian()
コード例 #11
0
def run(args):
    label_encoder_decoder = LabelEncoderDecoder(max_word_len=args.max_length,
                                                alphabet=args.alphabet)
    creator = EnglishDatasetCreator(dataset=args.data_path,
                                    words_path=args.words_path,
                                    pad_value=1,
                                    word_height=args.word_height,
                                    min_letter_size=37,
                                    max_letter_size=41,
                                    tall_to_low_letter_coef=1.3,
                                    label_encoder_decoder=label_encoder_decoder)

    labels, images = creator.create(words_count=args.words_count)
    np.save(os.path.join(args.save_to, 'data.npy'), images)
    np.save(os.path.join(args.save_to, 'labels.npy'), labels)
コード例 #12
0
class WordsDataset(Dataset):
    def __init__(self,
                 data_set_dir: str,
                 min_page_index: int,
                 max_page_index: int,
                 transform=None,
                 alphabet='russian'):
        self.directory = data_set_dir
        self.min_page = min_page_index
        self.max_page = max_page_index
        self.folders = os.listdir(data_set_dir)[min_page_index:max_page_index]
        self.transform = transform
        self.encoder_decoder = LabelEncoderDecoder(alphabet=alphabet)

    def __len__(self):
        return self.max_page - self.min_page

    def __getitem__(self, idx):
        folder_path = os.path.join(self.directory, self.folders[idx])
        page = cv2.imread(os.path.join(folder_path, 'page.png'),
                          cv2.IMREAD_COLOR)
        page_labels = np.load(os.path.join(folder_path, 'labels.npy'))
        random_word_index = np.random.randint(len(page_labels))
        word_index = page_labels[random_word_index]
        label = word_index[:16]
        coords = word_index[16:]
        min_h = coords[0]
        max_h = coords[1]
        min_w = coords[2]
        max_w = coords[3]
        image = np.ones((64, 512, 3), dtype=np.uint8) * 255
        image[:, :max_w - min_w, :] = page[min_h:max_h, min_w:max_w, :]

        image = image.transpose(2, 0, 1)

        sample = {
            Name.LABEL.value: label.astype(int),
            Name.IMAGE.value: image,
            Name.LABEL_LEN.value:
            self.encoder_decoder.decode_word_len(array=label)
        }

        if self.transform:
            sample = self.transform(sample)
        return sample