예제 #1
0
def classify_process():
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    model = CTC((122, 85), 28)
    model.build()
    model.m.compile(loss=ctc, optimizer="adam", metrics=["accuracy"])
    model.tm.compile(loss=ctc, optimizer="adam")
    model.tm.load_weights(
        "/home/alien/webservice/src/webservice/nn_model/ctc.h5")
    while True:
        if message_queue2.llen(settings.QUEUE_NAME_2) != 0:
            q = ujson.loads(
                message_queue2.lpop(settings.QUEUE_NAME_2).decode("utf-8"))
            feature = bytes(q["audio_feature"], encoding="utf-8")
            ID = q["id"]
            feature = np.frombuffer(base64.decodebytes(feature),
                                    dtype=np.float32)
            # print(feature)
            # print(np.array(feature.reshape(), dtype=np.float32).shape)
            k_ctc_out = K.ctc_decode(
                model.tm.predict(np.expand_dims(np.squeeze(
                    feature.reshape(122, 85)),
                                                axis=0),
                                 verbose=0), np.array([28]))
            decoded_out = K.eval(k_ctc_out[0][0])
            str_decoded_out = []
            for i, _ in enumerate(decoded_out):
                str_decoded_out.append("".join(
                    [index_map[c] for c in decoded_out[i] if not c == -1]))
            # print(str_decoded_out)
            # message_queue2.set(ID, ujson.dumps({"res": str_decoded_out[0]}))
            message_queue2.publish(ID, ujson.dumps({"res":
                                                    str_decoded_out[0]}))
            message_queue2.publish("PPT_COMMAND", str_decoded_out[0])
def beam_search(captcha_text):
    # 自定义产生一个验证码
    captcha_text = captcha_text
    # 产生验证码并归一化
    image = ImageCaptcha(width=160, height=60)
    x = np.array(image.generate_image(captcha_text)) / 255.0
    # 变成4维数据
    X_test = np.expand_dims(x, axis=0)
    # 用模型进行预测
    y_pred = model.predict(X_test)
    # 最好的3个结果
    top_paths = 3
    # 保存最好的3个结果
    outs = []
    for i in range(top_paths):
        labels = K.get_value(
            K.ctc_decode(y_pred,
                         input_length=np.ones(y_pred.shape[0]) *
                         y_pred.shape[1],
                         greedy=False,
                         top_paths=top_paths)[0][i])[0]
        outs.append(labels)
    # 最好的3个结果分别显示出来
    for out in outs:
        # 转字符串
        out = ''.join([characters[x] for x in out])
        # 显示图片
        plt.imshow(X_test[0])
        # 设置title
        plt.title('pred:' + out + '\ntrue: ' + captcha_text)
        # show
        plt.show()
def greedy(captcha_text):
    # 自定义产生一个验证码
    captcha_text = captcha_text
    # 产生验证码并归一化
    image = ImageCaptcha(width=160, height=60)
    x = np.array(image.generate_image(captcha_text)) / 255.0
    # 变成4维数据
    X_test = np.expand_dims(x, axis=0)
    # 用模型进行预测
    y_pred = model.predict(X_test)
    # 查看y_pred的shape
    print("y_pred shape:", y_pred.shape)
    # 获得每个序列最大概率的输出所在位置,其实也就是字符编号
    argmax = np.argmax(y_pred[0], axis=-1)
    print('id', '\t', 'characters')
    for x in argmax:
        # 打印字符编号和对应的字符
        print(x, '\t', pre_characters[x])
    # 使用贪心算法计算预测结果
    out = K.get_value(
        K.ctc_decode(y_pred,
                     input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1],
                     greedy=True)[0][0])
    # 把预测结果转化为字符串
    out = ''.join([characters[x] for x in out[0]])
    # 显示图片
    plt.imshow(X_test[0])
    # 设置title
    plt.title('pred:' + out + '\ntrue: ' + captcha_text)
    # show
    plt.show()
    def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]:

        softmax_output_fn = K.function(
            [self.network.get_layer('inputs').input,
             K.learning_phase()],
            [self.network.get_layer('softmax_output').output])
        if image.dtype == np.uint8:
            image = (image / 255).astype(np.float32)

        # Get the prediction and confidence using softmax_output_fn, passing the right input into it.
        input_image = np.expand_dims(image, 0)
        softmax_output = softmax_output_fn([input_image, 0])[0]

        input_length = np.array([softmax_output.shape[1]])
        decoded, log_prob = K.ctc_decode(softmax_output,
                                         input_length,
                                         greedy=True)

        pred_raw = K.eval(decoded[0])[0]
        pred = ''.join(self.data.mapping[label] for label in pred_raw).strip()

        neg_sum_logit = K.eval(log_prob)[0][0]
        conf = np.exp(-neg_sum_logit)

        return pred, conf
예제 #5
0
    def evaluate(self):
        correct_predictions = 0
        correct_char_predictions = 0

        x_val, y_val = self.val_generator[np.random.randint(
            0,
            int(self.val_generator.nb_samples /
                self.val_generator.batch_size))]
        #x_val, y_val = next(self.val_generator)

        y_pred = self.prediction_model.predict(x_val)

        shape = y_pred[:, 2:, :].shape
        ctc_decode = K.ctc_decode(y_pred[:, 2:, :],
                                  input_length=np.ones(shape[0]) *
                                  shape[1])[0][0]
        ctc_out = K.get_value(ctc_decode)[:, :self.label_len]

        for i in range(self.val_generator.batch_size):
            print(ctc_out[i])
            result_str = ''.join([self.characters[c] for c in ctc_out[i]])
            result_str = result_str.replace('-', '')
            if result_str == y_val[i]:
                correct_predictions += 1
            print(result_str, y_val[i])

            for c1, c2 in zip(result_str, y_val[i]):
                if c1 == c2:
                    correct_char_predictions += 1

        return correct_predictions / self.val_generator.batch_size, correct_char_predictions
예제 #6
0
def validate(model,
             x,
             y_true,
             input_len,
             label_len,
             y_strings,
             test=False,
             save_file=None):
    input_len = np.expand_dims(input_len, axis=1)
    label_len = np.expand_dims(label_len, axis=1)

    y_pred = model(x)
    loss = ctc_batch_cost(y_true, y_pred, input_len, label_len)

    input_len = np.squeeze(input_len)
    y_decode = ctc_decode(y_pred, input_len)[0][0]

    accuracy = 0.0

    for i in range(len(y_strings)):
        predicted_sentence = indices_to_string(y_decode[i].numpy())
        accuracy += wer(predicted_sentence, y_strings[i])

        if test:
            save_file.write("Correct Sentence:" + str(y_strings[i]) + "\n")
            save_file.write("Predicted Sentence:" + predicted_sentence + "\n")

    return tf.reduce_mean(loss), accuracy / len(y_strings)
 def get_predictions_recorded(
     self,
     spectrogram=False,
     recordingpath='recordings/demo.wav',
 ):
     """ Print a model's decoded predictions from live recordings
     Params:
         index (int): The example you would like to visualize
         partition (str): One of 'train' or 'validation'
         input_to_softmax (Model): The acoustic model
         model_path (str): Path to saved acoustic model's weights
     """
     # load the train and test data
     data_gen = AudioGenerator(spectrogram=spectrogram)
     data_gen.load_train_data()
     self.audio_path = recordingpath
     # obtain the true transcription and the audio feature
     data_point = data_gen.normalize(data_gen.featurize(recordingpath))
     #pprint(data_point)
     # obtain and decode the acoustic model's predictions
     prediction = self.input_to_softmax.predict(
         np.expand_dims(data_point, axis=0))
     output_length = [
         self.input_to_softmax.output_length(data_point.shape[0])
     ]
     pred_ints = (K.eval(K.ctc_decode(prediction, output_length)[0][0]) +
                  1).flatten().tolist()
     print('-' * 80)
     print('Predicted transcription:\n' + '\n' +
           ''.join(int_sequence_to_text(pred_ints)))
     print('-' * 80)
    def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]:
        """Predict on a single input."""
        softmax_output_fn = KerasModel(
            inputs=[self.network.get_layer("image").input],
            outputs=[self.network.get_layer("softmax_output").output],
        )
        if image.dtype == np.uint8:
            image = (image / 255).astype(np.float32)

        # Get the prediction and confidence using softmax_output_fn, passing the right input into it.
        input_image = np.expand_dims(image, 0)
        softmax_output = softmax_output_fn.predict(input_image)

        input_length = [softmax_output.shape[1]]
        decoded, log_prob = K.ctc_decode(softmax_output,
                                         input_length,
                                         greedy=True)

        pred_raw = K.eval(decoded[0])[0]
        pred = "".join(self.data.mapping[label] for label in pred_raw).strip()

        neg_sum_logit = K.eval(log_prob)[0][0]
        conf = np.exp(-neg_sum_logit)
        # Your code above (Lab 3)

        return pred, conf
예제 #9
0
def _decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):
    """Decodes the output of a softmax.
    Can use either greedy search (also known as best path)
    or a constrained dictionary search.
    # Arguments
        y_pred: tensor `(samples, time_steps, num_categories)`
            containing the prediction, or output of the softmax.
        input_length: tensor `(samples, )` containing the sequence length for
            each batch item in `y_pred`.
        greedy: perform much faster best-path search if `true`.
            This does not use a dictionary.
        beam_width: if `greedy` is `false`: a beam search decoder will be used
            with a beam of this width.
        top_paths: if `greedy` is `false`,
            how many of the most probable paths will be returned.
    # Returns
        Tuple:
            List: if `greedy` is `true`, returns a list of one element that
                contains the decoded sequence.
                If `false`, returns the `top_paths` most probable
                decoded sequences.
                Important: blank labels are returned as `-1`.
            Tensor `(top_paths, )` that contains
                the log probability of each decoded sequence.
    """

    decoded = K.ctc_decode(y_pred=y_pred,
                           input_length=input_length,
                           greedy=greedy,
                           beam_width=beam_width,
                           top_paths=top_paths)
    paths = [path for path in decoded[0]]
    logprobs = decoded[1]

    return (paths, logprobs)
예제 #10
0
    def predict(self, uriImage):
        img = self.loadImage(uriImage)
        img = self.preprocessImg(img)
        random_img = []
        random_img.append(img)
        random_img = np.array(random_img)
        prediction = self.act_model.predict(random_img)

        out = K.get_value(
            K.ctc_decode(prediction,
                         input_length=np.ones(prediction.shape[0]) *
                         prediction.shape[1],
                         greedy=True)[0][0])
        # see the results
        all_predictions = []
        i = 0
        for x in out:
            # print("predicted text = ", end='')
            pred = ""
            for p in x:
                if int(p) != -1:
                    pred += self.char_list[int(p)]
            all_predictions.append(pred)
            i += 1
        print(all_predictions)
        return all_predictions[0]
예제 #11
0
def _decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):

    decoded = K.ctc_decode(y_pred=y_pred, input_length=input_length,
                           greedy=greedy, beam_width=beam_width, top_paths=top_paths)

    paths = [path.numpy() for path in decoded[0]]
    logprobs  = decoded[1].numpy()

    return (paths, logprobs)
예제 #12
0
def predict_text(model, img):
    y_pred = model.predict(img[np.newaxis, :, :, :])
    shape = y_pred[:, 2:, :].shape
    ctc_decode = K.ctc_decode(y_pred[:, 2:, :],
                              input_length=np.ones(shape[0]) * shape[1])[0][0]
    ctc_out = K.get_value(ctc_decode)[:, :cfg.label_len]
    result_str = ''.join([cfg.characters[c] for c in ctc_out[0]])
    result_str = result_str.replace('-', '')
    return result_str
예제 #13
0
 def __call__(self, batch_logits: np.ndarray, input_length: int,
              **kwargs) -> List[np.ndarray]:
     """ Decode the best guess from logits using beam search algorithm. """
     decoded = np.array((K.eval(
         K.ctc_decode(batch_logits, [input_length],
                      greedy=False,
                      beam_width=self.beam_width,
                      top_paths=self.top_paths)[0][0])).flatten().tolist())
     return [decoded]
예제 #14
0
def evaluate(model, batch_size=128, steps=20):
    batch_acc = 0
    valid_data = CaptchaSequence(characters, batch_size, steps)
    for [X_test, y_test, _, _], _ in valid_data:
        y_pred = base_model.predict(X_test)
        shape = y_pred.shape
        out = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(shape[0])*shape[1])[0][0])[:, :4]
        if out.shape[1] == 4:
            batch_acc += (y_test == out).all(axis=1).mean()
    return batch_acc / steps
예제 #15
0
def identify_captcha(base64_img):
    image = base64.b64decode(base64_img)
    image = io.BytesIO(image)
    image = Image.open(image)
    x_test = np.array(np.array(image)/255.0).reshape((1, 30, 91, 3))
    # with graph.as_default():
    y_pred = base_model.predict(x_test)
    out = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0])*y_pred.shape[1], )[0][0])[:, :4]
    out = ''.join([characters[m] for m in out[0]])
    return out
예제 #16
0
 def decode_batch_predictions( pred ):
     input_len = np.ones(pred.shape[0]) * pred.shape[1]
     # Use greedy search. For complex tasks, you can use beam search
     results = K.ctc_decode( pred, input_length=input_len, greedy=True )[0][0][:,:4]
     # Iterate over the results and get back the text
     output_text = []
     for res in results:
         res = tf.strings.reduce_join(num_to_char(res)).numpy().decode('utf-8')
         output_text.append(res)
     return output_text    
예제 #17
0
    def call(self, inputs, **kwargs):
        shape = tf.shape(inputs)
        batch_size = shape[0]
        max_length = shape[1, None]
        input_length = tf.tile(max_length, [batch_size])

        prediction, scores = K.ctc_decode(inputs,
                                          input_length,
                                          beam_width=self.beam_width)
        return [prediction, scores]
def making_prediction(best_model,test_data,test_generator,test_labels) :
    y_pred = best_model.predict(test_data, batch_size=2)
    input_shape = np.ones(y_pred.shape[0])*y_pred.shape[1]
    out = K.get_value(K.ctc_decode(y_pred, input_length=input_shape,greedy=True)[0][0])
    pred = []
    for element in out :
        pred.append(labels_to_text(element[:fine_stop_element(element)]))
    gt = []
    for img in test_generator.texts :
        gt.append(test_labels[img])
    return pred , gt
예제 #19
0
 def __call__(self, batch_logits: np.ndarray,
              input_length: int) -> List[np.ndarray]:
     """ Decode the best guess from logits using greedy algorithm. """
     # Choose the class with maximum probability
     # best_candidates = np.argmax(batch_logits, axis=2)
     # Merge repeated chars
     # decoded = [np.array([k for k, _ in itertools.groupby(best_candidate)])
     #            for best_candidate in best_candidates]
     decoded = np.array((K.eval(
         K.ctc_decode(batch_logits, [input_length],
                      greedy=True)[0][0])).flatten().tolist())
     return [decoded]
예제 #20
0
파일: app.py 프로젝트: jimevansv/Data_606
def predict(filename, my_model):
  #filepath='/content/sample_data/IAM/Images/'+filename
  sample_processed_image=[]
  sample_processed_image.append((preprocess_image(filename, 128, 64)).T)
  sample_processed_image=np.array(sample_processed_image)
  sample_processed_image = sample_processed_image.reshape(1, 128, 64, 1)
  prediction_trail = my_model.predict(x=sample_processed_image)
  prediction_decode = tf_keras_backend.get_value(tf_keras_backend.ctc_decode(prediction_trail,
                                                                                 input_length = np.ones(prediction_trail.shape[0])*prediction_trail.shape[1],
                                                                                  greedy=True)[0][0])

  return decode_text(prediction_decode)
    def Predict(self, data_input, input_len):
        '''
		预测结果
		返回语音识别后的拼音符号列表
		'''

        batch_size = 1
        in_len = np.zeros((batch_size), dtype=np.int32)

        in_len[0] = input_len

        x_in = np.zeros((batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1),
                        dtype=np.float)

        for i in range(batch_size):
            x_in[i, 0:len(data_input)] = data_input

        base_pred = self.base_model.predict(x=x_in)

        #print('base_pred:\n', base_pred)

        #y_p = base_pred
        #for j in range(200):
        #	mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0]
        #	print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100])
        #	print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j]))
        #	count=0
        #	for i in range(y_p[0][j].shape[0]):
        #		if(y_p[0][j][i] < mean):
        #			count += 1
        #	print('count:',count)

        base_pred = base_pred[:, :, :]
        #base_pred =base_pred[:, 2:, :]

        r = K.ctc_decode(base_pred,
                         in_len,
                         greedy=True,
                         beam_width=100,
                         top_paths=1)

        #print('r', r)

        r1 = K.get_value(r[0][0])
        #print('r1', r1)

        #r2 = K.get_value(r[1])
        #print(r2)

        r1 = r1[0]

        return r1
        pass
예제 #22
0
def get_decoder(output_tensor, alphabet):
    def get_length(tensor):
        lengths = tf.reduce_sum(tf.ones_like(tensor), 1)
        return tf.cast(lengths, tf.int32)

    sequence_length = get_length(tf.reduce_max(output_tensor, 2))
    top_k_decoded, _ = K.ctc_decode(output_tensor,
                                    sequence_length,
                                    greedy=False,
                                    beam_width=64)
    print(top_k_decoded[0])
    decoder = K.function([output_tensor], [top_k_decoded[0]])
    return partial(batch_tensorflow_decode, alphabet=alphabet, decoder=decoder)
예제 #23
0
 def decode(self, pred):
     input_len = np.ones(pred.shape[0]) * pred.shape[1]
     # Use greedy search. For complex tasks, you can use beam search
     results = ctc_decode(pred, input_length=input_len,
                          greedy=True)[0][0][:, :self.max_length]
     # Iterate over the results and get back the text
     output_text = []
     for res in results:
         res = self.num_to_char(res)
         res = reduce_join(res)
         res = res.numpy().decode("utf-8")
         output_text.append(res)
     return output_text
예제 #24
0
def decode_predict_ctc(out, chars = ArchitectureConfig.CHARS, top_paths=1):
    results = []
    beam_width = 5
    if beam_width < top_paths:
        beam_width = top_paths
    for i in range(top_paths):
        lables = backend.get_value(
            backend.ctc_decode(
                out, input_length=np.ones(out.shape[0]) * out.shape[1],
                greedy=False, beam_width=beam_width, top_paths=top_paths
            )[0][i]
        )[0]
        text = labels_to_text(chars, lables)
        results.append(text)
    return results
예제 #25
0
    def Predict(self, data_input, input_len):
        '''
		预测结果
		返回语音识别后的拼音符号列表
		'''

        batch_size = 1
        in_len = np.zeros((batch_size), dtype=np.int32)

        in_len[0] = input_len

        x_in = np.zeros((batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1),
                        dtype=np.float)

        for i in range(batch_size):
            x_in[i, 0:len(data_input)] = data_input

        base_pred = self.base_model.predict(x=x_in)

        #print('base_pred:\n', base_pred)

        #y_p = base_pred
        #for j in range(200):
        #	mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0]
        #	print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100])
        #	print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j]))
        #	count=0
        #	for i in range(y_p[0][j].shape[0]):
        #		if(y_p[0][j][i] < mean):
        #			count += 1
        #	print('count:',count)

        base_pred = base_pred[:, :, :]
        #base_pred =base_pred[:, 2:, :]

        r = K.ctc_decode(base_pred,
                         in_len,
                         greedy=True,
                         beam_width=100,
                         top_paths=1)

        #print('r', r)
        if (tf.__version__[0:2] == '1.'):
            r1 = r[0][0].eval(session=tf.compat.v1.Session())
        else:
            r1 = r[0][0].numpy()
        #tf.compat.v1.reset_default_graph()
        return r1[0]
예제 #26
0
def predict_text(img):
    # read image
    image = cv2.imdecode(img)
    # cv2.imshow("decoded", image)
    cv2.imwrite("./test_img.jpg", image)
    # preprocess
    image = preprocess(image)
    # predict image text
    pred = model.predict(image)
    # decode ctc 
    decoded = K.get_value(K.ctc_decode(pred, 
                                       input_length=np.ones(pred.shape[0])*pred.shape[1], 
                                       greedy=True)[0][0])
    predicted_text = num_to_label(decoded[0])
    print("======================")
    print(predicted_text)
    return predicted_text
 def Predict(self, data_input, input_len):
     '''
     预测结果
     返回语音识别后的拼音符号列表
     '''
     batch_size = 1
     in_len = np.zeros((batch_size),dtype = np.int32)
     in_len[0] = input_len
     x_in = np.zeros((batch_size, self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float)
     for i in range(batch_size):
         x_in[i,0:len(data_input)] = data_input
     base_pred = self.base_model.predict(x = x_in)
     base_pred =base_pred[:, :, :]
     r = K.ctc_decode(base_pred, in_len, greedy = True, beam_width=100, top_paths=1)
     r1 = K.get_value(r[0][0])
     r1=r1[0]
     return r1
예제 #28
0
def get_prediction(act_model, test_images):
    prediction = act_model.predict(test_images)

    decoded = K.ctc_decode(prediction,
                           input_length=np.ones(prediction.shape[0]) * prediction.shape[1],
                           greedy=True)[0][0]

    out = K.get_value(decoded)

    prediction = []
    for i, x in enumerate(out):
        pred = ''
        for p in x:
            if int(p) != -1:
                pred += letters[int(p)]

        prediction.append(pred)
    return prediction
예제 #29
0
def test(base_model):

    data, label = gen_data_label_data(False)
    y_pred = base_model.predict(data)
    shape = y_pred[:, :, :].shape
    out = K.get_value(
        K.ctc_decode(y_pred[:, :, :],
                     input_length=np.ones(shape[0]) * shape[1])[0][0])[:, :7]

    right_num = 0
    for i in range(len(data)):
        eco = len(chars) + 1
        str_label = ''.join([str(x) for x in label[i] if x != eco])
        str_out = ''.join([str(x) for x in out[i] if x != eco])
        if str_label == str_out:
            right_num += 1
    acc = (right_num / len(data)) * 100
    print("test acc is :{}%".format(str(acc)))
예제 #30
0
    def predict(self,
                x,
                batch_size=None,
                steps=1,
                callbacks=None,
                max_queue_size=10,
                workers=1,
                use_multiprocessing=False,
                ctc_decode=True):

        out = self.model.predict(x=x, batch_size=batch_size, verbose=0, steps=steps,
                                 callbacks=callbacks, max_queue_size=max_queue_size,
                                 workers=workers, use_multiprocessing=use_multiprocessing)

        if not ctc_decode:
            return np.log(out.clip(min=1e-8)), []

        steps_done = 0

        batch_size = int(np.ceil(len(out) / steps))
        input_length = len(max(out, key=len))

        predicts, probabilities = [], []

        while steps_done < steps:
            index = steps_done * batch_size
            until = index + batch_size

            x_test = np.asarray(out[index:until])
            x_test_len = np.asarray([input_length for _ in range(len(x_test))])

            decode, log = K.ctc_decode(x_test,
                                       x_test_len,
                                       greedy=self.greedy,
                                       beam_width=self.beam_width,
                                       top_paths=self.top_paths)

            probabilities.extend([np.exp(x) for x in log])
            decode = [[[int(p) for p in x if p != -1] for x in y] for y in decode]
            predicts.extend(np.swapaxes(decode, 0, 1))

            steps_done += 1

        return (predicts, probabilities)