예제 #1
0
def predict(img_path, base_model, thresholding=160):
    t = Timer()
    img = Image.open(img_path)
    im = img.convert('L')
    scale = im.size[1] * 1.0 / 32
    w = im.size[0] / scale
    w = int(w)
    print('w:', w)

    im = im.resize((w, 32), Image.ANTIALIAS)
    img = np.array(im).astype(np.float32) / 255.0 - 0.5
    X = img.reshape((32, w, 1))
    X = np.array([X])

    t.tic()
    y_pred = base_model.predict(X)
    t.toc()
    print("times,", t.diff)
    argmax = np.argmax(y_pred, axis=2)[0]

    y_pred = y_pred[:, :, :]
    out = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1], )[0][0])[:, :]
    out = u''.join([id_to_char[x] for x in out[0]])

    return out, im
예제 #2
0
    def test_ctc_decode_greedy(self):
        # Test adapted from tensorflow
        """Test two batch entries - best path decoder."""
        max_time_steps = 6

        seq_len_0 = 4
        input_prob_matrix_0 = np.asarray(
            [
                [1.0, 0.0, 0.0, 0.0],  # t=0
                [0.0, 0.0, 0.4, 0.6],  # t=1
                [0.0, 0.0, 0.4, 0.6],  # t=2
                [0.0, 0.9, 0.1, 0.0],  # t=3
                [0.0, 0.0, 0.0, 0.0],  # t=4 (ignored)
                [0.0, 0.0, 0.0, 0.0],
            ],  # t=5 (ignored)
            dtype=np.float32,
        )
        input_log_prob_matrix_0 = np.log(input_prob_matrix_0)

        seq_len_1 = 5
        # dimensions are time x depth

        input_prob_matrix_1 = np.asarray(
            [
                [0.1, 0.9, 0.0, 0.0],  # t=0
                [0.0, 0.9, 0.1, 0.0],  # t=1
                [0.0, 0.0, 0.1, 0.9],  # t=2
                [0.0, 0.9, 0.1, 0.1],  # t=3
                [0.9, 0.1, 0.0, 0.0],  # t=4
                [0.0, 0.0, 0.0, 0.0],
            ],  # t=5 (ignored)
            dtype=np.float32,
        )

        # len max_time_steps array of batch_size x depth matrices
        inputs = [np.vstack([input_prob_matrix_0[t, :], input_prob_matrix_1[t, :]]) for t in range(max_time_steps)]

        # change tensorflow order to keras backend order
        inputs = KTF.variable(np.asarray(inputs).transpose((1, 0, 2)))
        # batch_size length vector of sequence_lengths
        input_length = KTF.variable(np.array([seq_len_0, seq_len_1], dtype=np.int32))

        # batch_size length vector of negative log probabilities
        log_prob_truth = np.array(
            [np.sum(-np.log([1.0, 0.6, 0.6, 0.9])), np.sum(-np.log([0.9, 0.9, 0.9, 0.9, 0.9]))], np.float32
        )[:, np.newaxis]

        # keras output, unlike tensorflow, is a dense (not sparse) tensor
        decode_truth = np.array([[0, 1, -1], [1, 1, 0]])

        decode_pred_tf, log_prob_pred_tf = KTF.ctc_decode(inputs, input_length, greedy=True)

        assert len(decode_pred_tf) == 1

        decode_pred = KTF.eval(decode_pred_tf[0])
        log_prob_pred = KTF.eval(log_prob_pred_tf)

        assert np.alltrue(decode_truth == decode_pred)
        assert np.allclose(log_prob_truth, log_prob_pred)
예제 #3
0
def predict_model(model, input_):
    pred_ = model.predict(input_)
    shape = pred_[:, :, :].shape
    ctc_decode = K.ctc_decode(pred_[:, :, :],
                              input_length=np.ones(shape[0]) * shape[1])[0][0]
    output_ = K.get_value(ctc_decode)
    # return output_[:, :ocr.MAX_CAPTCHA]
    return output_
예제 #4
0
    def test_ctc_decode_greedy(self):
        # Test adapted from tensorflow
        """Test two batch entries - best path decoder."""
        max_time_steps = 6

        seq_len_0 = 4
        input_prob_matrix_0 = np.asarray(
            [[1.0, 0.0, 0.0, 0.0],  # t=0
             [0.0, 0.0, 0.4, 0.6],  # t=1
             [0.0, 0.0, 0.4, 0.6],  # t=2
             [0.0, 0.9, 0.1, 0.0],  # t=3
             [0.0, 0.0, 0.0, 0.0],  # t=4 (ignored)
             [0.0, 0.0, 0.0, 0.0]],  # t=5 (ignored)
            dtype=np.float32)
        input_log_prob_matrix_0 = np.log(input_prob_matrix_0)

        seq_len_1 = 5
        # dimensions are time x depth

        input_prob_matrix_1 = np.asarray(
            [[0.1, 0.9, 0.0, 0.0],  # t=0
             [0.0, 0.9, 0.1, 0.0],  # t=1
             [0.0, 0.0, 0.1, 0.9],  # t=2
             [0.0, 0.9, 0.1, 0.1],  # t=3
             [0.9, 0.1, 0.0, 0.0],  # t=4
             [0.0, 0.0, 0.0, 0.0]],  # t=5 (ignored)
            dtype=np.float32)

        # len max_time_steps array of batch_size x depth matrices
        inputs = [np.vstack([input_prob_matrix_0[t, :],
                             input_prob_matrix_1[t, :]])
                  for t in range(max_time_steps)]

        # change tensorflow order to keras backend order
        inputs = KTF.variable(np.asarray(inputs).transpose((1, 0, 2)))
        # batch_size length vector of sequence_lengths
        input_length = KTF.variable(np.array([seq_len_0, seq_len_1], dtype=np.int32))

        # batch_size length vector of negative log probabilities
        log_prob_truth = np.array([
            np.sum(-np.log([1.0, 0.6, 0.6, 0.9])),
            np.sum(-np.log([0.9, 0.9, 0.9, 0.9, 0.9]))
        ], np.float32)[:, np.newaxis]

        # keras output, unlike tensorflow, is a dense (not sparse) tensor
        decode_truth = np.array([[0, 1, -1], [1, 1, 0]])

        decode_pred_tf, log_prob_pred_tf = KTF.ctc_decode(inputs,
                                                          input_length,
                                                          greedy=True)

        assert len(decode_pred_tf) == 1

        decode_pred = KTF.eval(decode_pred_tf[0])
        log_prob_pred = KTF.eval(log_prob_pred_tf)

        assert np.alltrue(decode_truth == decode_pred)
        assert np.allclose(log_prob_truth, log_prob_pred)
예제 #5
0
def eval(model, sample, sample_target):
        """
        计算一个单独样本的输出
        """
        _input = sample.reshape(1, sample.shape[0], sample.shape[1])
        log_prob = model.predict(_input)
        output = K.ctc_decode(log_prob, input_length=np.asarray(model.get_layer('pred').output_shape[1]).reshape(1,))
        with tf.Session() as sess:
            print("sample target", sample_target)
            print("predicted", output[0][0].eval())
예제 #6
0
def get_tensorflow_decoder(output_tensor, beam_size=1024):
    """ The TensorFlow implementation of the CTC decoder. """
    def get_length(tensor):
        lengths = tf.reduce_sum(tf.ones_like(tensor), 1)
        return tf.cast(lengths, tf.int32)

    sequence_length = get_length(tf.reduce_max(output_tensor, 2))
    top_k_decoded, _ = K.ctc_decode(output_tensor,
                                    sequence_length,
                                    greedy=False,
                                    beam_width=beam_size)
    decoder = K.function([output_tensor], [top_k_decoded[0]])
    return decoder
예제 #7
0
    def test_ctc_decode_beam_search(self):
        """Test one batch, two beams - hibernating beam search."""

        depth = 6

        seq_len_0 = 5
        input_prob_matrix_0 = np.asarray(
            [[0.30999, 0.309938, 0.0679938, 0.0673362, 0.0708352, 0.173908],
             [0.215136, 0.439699, 0.0370931, 0.0393967, 0.0381581, 0.230517],
             [0.199959, 0.489485, 0.0233221, 0.0251417, 0.0233289, 0.238763],
             [0.279611, 0.452966, 0.0204795, 0.0209126, 0.0194803, 0.20655],
             [0.51286, 0.288951, 0.0243026, 0.0220788, 0.0219297, 0.129878],
             # Random entry added in at time=5
             [0.155251, 0.164444, 0.173517, 0.176138, 0.169979, 0.160671]],
            dtype=np.float32)

        # len max_time_steps array of batch_size x depth matrices
        inputs = ([input_prob_matrix_0[t, :][np.newaxis, :]
                  for t in range(seq_len_0)] +  # Pad to max_time_steps = 8
                  2 * [np.zeros((1, depth), dtype=np.float32)])

        inputs = KTF.variable(np.asarray(inputs).transpose((1, 0, 2)))

        # batch_size length vector of sequence_lengths
        input_length = KTF.variable(np.array([seq_len_0], dtype=np.int32))
        # batch_size length vector of negative log probabilities
        log_prob_truth = np.array([
            0.584855,  # output beam 0
            0.389139  # output beam 1
        ], np.float32)[np.newaxis, :]

        decode_truth = [np.array([1, 0]), np.array([0, 1, 0])]

        beam_width = 2
        top_paths = 2

        decode_pred_tf, log_prob_pred_tf = KTF.ctc_decode(inputs,
                                                          input_length,
                                                          greedy=False,
                                                          beam_width=beam_width,
                                                          top_paths=top_paths)

        assert len(decode_pred_tf) == top_paths

        log_prob_pred = KTF.eval(log_prob_pred_tf)

        for i in range(top_paths):
            assert np.alltrue(decode_truth[i] == KTF.eval(decode_pred_tf[i]))

        assert np.allclose(log_prob_truth, log_prob_pred)
예제 #8
0
    def test_ctc_decode_beam_search(self):
        """Test one batch, two beams - hibernating beam search."""

        depth = 6

        seq_len_0 = 5
        input_prob_matrix_0 = np.asarray(
            [[0.30999, 0.309938, 0.0679938, 0.0673362, 0.0708352, 0.173908],
             [0.215136, 0.439699, 0.0370931, 0.0393967, 0.0381581, 0.230517],
             [0.199959, 0.489485, 0.0233221, 0.0251417, 0.0233289, 0.238763],
             [0.279611, 0.452966, 0.0204795, 0.0209126, 0.0194803, 0.20655],
             [0.51286, 0.288951, 0.0243026, 0.0220788, 0.0219297, 0.129878],
             # Random entry added in at time=5
             [0.155251, 0.164444, 0.173517, 0.176138, 0.169979, 0.160671]],
            dtype=np.float32)

        # len max_time_steps array of batch_size x depth matrices
        inputs = ([input_prob_matrix_0[t, :][np.newaxis, :]
                  for t in range(seq_len_0)] +  # Pad to max_time_steps = 8
                  2 * [np.zeros((1, depth), dtype=np.float32)])

        inputs = KTF.variable(np.asarray(inputs).transpose((1, 0, 2)))

        # batch_size length vector of sequence_lengths
        input_length = KTF.variable(np.array([seq_len_0], dtype=np.int32))
        # batch_size length vector of negative log probabilities
        log_prob_truth = np.array([
            0.584855,  # output beam 0
            0.389139  # output beam 1
        ], np.float32)[np.newaxis, :]

        decode_truth = [np.array([1, 0]), np.array([0, 1, 0])]

        beam_width = 2
        top_paths = 2

        decode_pred_tf, log_prob_pred_tf = KTF.ctc_decode(inputs,
                                                          input_length,
                                                          greedy=False,
                                                          beam_width=beam_width,
                                                          top_paths=top_paths)

        assert len(decode_pred_tf) == top_paths

        log_prob_pred = KTF.eval(log_prob_pred_tf)

        for i in range(top_paths):
            assert np.alltrue(decode_truth[i] == KTF.eval(decode_pred_tf[i]))

        assert np.allclose(log_prob_truth, log_prob_pred)
예제 #9
0
파일: OCR.py 프로젝트: Ulquiorracifa/Ocr2
def predict(img_path, base_model, thresholding=160):
    """
        thresholding 输入范围 0 - 255
        默认为160
        0 : 采用自动阈值
        > 0 : 采用人工设置的阈值
    """
    if thresholding > 255:
        thresholding = 255
    if thresholding < 0:
        thresholding = 0

    t = Timer()
    img = Image.open(img_path)
    im = img.convert('L')
    scale = im.size[1] * 1.0 / 64
    w = im.size[0] / scale
    w = int(w)
    # print('w:',w)

    im = im.resize((160, 32), Image.ANTIALIAS)
    img = np.array(im)
    h, w = img.shape

    if thresholding == 0:
        img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 3, 5)
    else:
        for i in range(h):
            for j in range(w):
                if img[i, j] > thresholding:
                    img[i, j] = 255
                else:
                    img[i, j] = 0

    img = np.array(img)

    img = img.astype(np.float32) / 255.0 - 0.5
    X = img.reshape((32, 160, 1))
    X = np.array([X])

    t.tic()
    y_pred = base_model.predict(X)
    t.toc()
    # print("times,",t.diff)
    argmax = np.argmax(y_pred, axis=2)[0]
    y_pred = y_pred[:, :, :]
    out = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1], )[0][0])[:, :]
    out = u''.join([id_to_char[x] for x in out[0]])

    return out, im
예제 #10
0
def test_model(model, X_test, Y_test):
    print("X_test:", X_test.shape)
    print("Y_test:", Y_test.shape)

    y_pred = model.predict(X_test)
    shape = y_pred[:, :, :].shape

    ctc_decode = K.ctc_decode(y_pred[:, :, :],
                              input_length=np.ones(shape[0]) * shape[1])[0][0]
    out = K.get_value(ctc_decode)[:, :MAX_CAPTCHA]

    accur = np.sum(abs(out - Y_test), axis=1)
    accur_score = len(accur[accur == 0]) * 1.0 / len(accur)
    print("accur_score:", accur_score)
예제 #11
0
def predict(img_path, base_model):
    img = Image.open(img_path).convert('L')
    w, h = img.size
    rate = w / h

    img = img.resize((int(rate * 32), 32), Image.ANTIALIAS)
    img = np.array(img).astype(np.float32) / 255.0 - 0.5
    x = img.reshape(1, 32, int(rate * 32), 1)
    y_pred = base_model.predict(x)
    print(np.argmax(y_pred, axis=2)[0])
    y_pred = y_pred[:, :, :]
    print(
        type(
            K.ctc_decode(
                y_pred,
                input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1],
            )[0][0]))
    out = K.get_value(
        K.ctc_decode(
            y_pred,
            input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1],
        )[0][0])[:, :]
    out = u''.join([id_to_char[x] for x in out[0]])
    return out, img
예제 #12
0
    def eval(self, sample, sample_target):
        """Evaluate model given a single sample

        Args:
            sample (torch.Tensor): shape (n_features, frame_len)

        Returns:
            log probabilities (torch.Tensor):
                shape (n_features, output_len)
        """
        
        _input = sample.reshape(1, sample.shape[0], sample.shape[1])
        log_prob = self.predict_model.predict(_input)
        output = K.ctc_decode(log_prob, input_length=np.asarray(self.model.get_layer('pred').output_shape[1]).reshape(1,))
        with tf.Session().as_default() as sess:
            print("sample target", sample_target)
            print("predicted", output[0][0].eval())
예제 #13
0
def predict(img_path, base_model, thresholding=160):
    if thresholding > 255:
        thresholding = 255
    if thresholding < 0:
        thresholding = 0

    t = Timer()
    img = Image.open(img_path).convert('L')
    w, h = img.size
    rate = w / h

    img = img.resize((int(rate * 32), 32), Image.ANTIALIAS)

    img = np.array(img)
    # if thresholding == 0:
    #    img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 3, 5)
    # for i in range(32):
    #    for j in range(int(rate * 32)):
    #        if img[i,j] > 160:
    #            img[i,j] = 255
    #        else:
    #            img[i,j] = 0
    img = np.array(img, 'f') / 255.0 - 0.5
    t_img = np.zeros((32, 512))
    t_img[:, :int(rate * 32)] = img

    X = np.array([t_img])
    X = X.reshape((1, 32, 512, 1))
    t.tic()
    y_pred = base_model.predict(X)
    t.toc()
    print("times,", t.diff)
    argmax = np.argmax(y_pred, axis=2)[0]
    y_pred = y_pred[:, :, :]
    out = K.get_value(
        K.ctc_decode(
            y_pred,
            input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1],
        )[0][0])[:, :]
    out = u''.join([id_to_char[x] for x in out[0]])

    return out, t_img
예제 #14
0
 def __text_recognition(self):
     self._rec_results = []
     for box in self._boxes:
         test_img = self._image[box[1]:box[7], box[0]:box[6]]
         test_img = cv2.cvtColor(test_img, cv2.COLOR_BGR2GRAY)
         scale = test_img.shape[0] * 1.0 / 32
         w = test_img.shape[1] / scale
         w = int(w)
         test_img = cv2.resize(test_img, (w, 32))
         test_img = np.array(test_img).astype(np.float32) / 255.0 - 0.5
         X = test_img.reshape((32, w, 1))
         X = np.array([X])
         y_pred = self._ocr_model.predict(X)
         y_pred = y_pred[:, :, :]
         word = K.get_value(
             K.ctc_decode(
                 y_pred,
                 input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1],
             )[0][0])[:, :]
         words = u''.join([id_to_char[x] for x in word[0]])
         self._rec_results.append(words)