def classify_process(): os.environ["CUDA_VISIBLE_DEVICES"] = "1" model = CTC((122, 85), 28) model.build() model.m.compile(loss=ctc, optimizer="adam", metrics=["accuracy"]) model.tm.compile(loss=ctc, optimizer="adam") model.tm.load_weights( "/home/alien/webservice/src/webservice/nn_model/ctc.h5") while True: if message_queue2.llen(settings.QUEUE_NAME_2) != 0: q = ujson.loads( message_queue2.lpop(settings.QUEUE_NAME_2).decode("utf-8")) feature = bytes(q["audio_feature"], encoding="utf-8") ID = q["id"] feature = np.frombuffer(base64.decodebytes(feature), dtype=np.float32) # print(feature) # print(np.array(feature.reshape(), dtype=np.float32).shape) k_ctc_out = K.ctc_decode( model.tm.predict(np.expand_dims(np.squeeze( feature.reshape(122, 85)), axis=0), verbose=0), np.array([28])) decoded_out = K.eval(k_ctc_out[0][0]) str_decoded_out = [] for i, _ in enumerate(decoded_out): str_decoded_out.append("".join( [index_map[c] for c in decoded_out[i] if not c == -1])) # print(str_decoded_out) # message_queue2.set(ID, ujson.dumps({"res": str_decoded_out[0]})) message_queue2.publish(ID, ujson.dumps({"res": str_decoded_out[0]})) message_queue2.publish("PPT_COMMAND", str_decoded_out[0])
def beam_search(captcha_text): # 自定义产生一个验证码 captcha_text = captcha_text # 产生验证码并归一化 image = ImageCaptcha(width=160, height=60) x = np.array(image.generate_image(captcha_text)) / 255.0 # 变成4维数据 X_test = np.expand_dims(x, axis=0) # 用模型进行预测 y_pred = model.predict(X_test) # 最好的3个结果 top_paths = 3 # 保存最好的3个结果 outs = [] for i in range(top_paths): labels = K.get_value( K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1], greedy=False, top_paths=top_paths)[0][i])[0] outs.append(labels) # 最好的3个结果分别显示出来 for out in outs: # 转字符串 out = ''.join([characters[x] for x in out]) # 显示图片 plt.imshow(X_test[0]) # 设置title plt.title('pred:' + out + '\ntrue: ' + captcha_text) # show plt.show()
def greedy(captcha_text): # 自定义产生一个验证码 captcha_text = captcha_text # 产生验证码并归一化 image = ImageCaptcha(width=160, height=60) x = np.array(image.generate_image(captcha_text)) / 255.0 # 变成4维数据 X_test = np.expand_dims(x, axis=0) # 用模型进行预测 y_pred = model.predict(X_test) # 查看y_pred的shape print("y_pred shape:", y_pred.shape) # 获得每个序列最大概率的输出所在位置,其实也就是字符编号 argmax = np.argmax(y_pred[0], axis=-1) print('id', '\t', 'characters') for x in argmax: # 打印字符编号和对应的字符 print(x, '\t', pre_characters[x]) # 使用贪心算法计算预测结果 out = K.get_value( K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1], greedy=True)[0][0]) # 把预测结果转化为字符串 out = ''.join([characters[x] for x in out[0]]) # 显示图片 plt.imshow(X_test[0]) # 设置title plt.title('pred:' + out + '\ntrue: ' + captcha_text) # show plt.show()
def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]: softmax_output_fn = K.function( [self.network.get_layer('inputs').input, K.learning_phase()], [self.network.get_layer('softmax_output').output]) if image.dtype == np.uint8: image = (image / 255).astype(np.float32) # Get the prediction and confidence using softmax_output_fn, passing the right input into it. input_image = np.expand_dims(image, 0) softmax_output = softmax_output_fn([input_image, 0])[0] input_length = np.array([softmax_output.shape[1]]) decoded, log_prob = K.ctc_decode(softmax_output, input_length, greedy=True) pred_raw = K.eval(decoded[0])[0] pred = ''.join(self.data.mapping[label] for label in pred_raw).strip() neg_sum_logit = K.eval(log_prob)[0][0] conf = np.exp(-neg_sum_logit) return pred, conf
def evaluate(self): correct_predictions = 0 correct_char_predictions = 0 x_val, y_val = self.val_generator[np.random.randint( 0, int(self.val_generator.nb_samples / self.val_generator.batch_size))] #x_val, y_val = next(self.val_generator) y_pred = self.prediction_model.predict(x_val) shape = y_pred[:, 2:, :].shape ctc_decode = K.ctc_decode(y_pred[:, 2:, :], input_length=np.ones(shape[0]) * shape[1])[0][0] ctc_out = K.get_value(ctc_decode)[:, :self.label_len] for i in range(self.val_generator.batch_size): print(ctc_out[i]) result_str = ''.join([self.characters[c] for c in ctc_out[i]]) result_str = result_str.replace('-', '') if result_str == y_val[i]: correct_predictions += 1 print(result_str, y_val[i]) for c1, c2 in zip(result_str, y_val[i]): if c1 == c2: correct_char_predictions += 1 return correct_predictions / self.val_generator.batch_size, correct_char_predictions
def validate(model, x, y_true, input_len, label_len, y_strings, test=False, save_file=None): input_len = np.expand_dims(input_len, axis=1) label_len = np.expand_dims(label_len, axis=1) y_pred = model(x) loss = ctc_batch_cost(y_true, y_pred, input_len, label_len) input_len = np.squeeze(input_len) y_decode = ctc_decode(y_pred, input_len)[0][0] accuracy = 0.0 for i in range(len(y_strings)): predicted_sentence = indices_to_string(y_decode[i].numpy()) accuracy += wer(predicted_sentence, y_strings[i]) if test: save_file.write("Correct Sentence:" + str(y_strings[i]) + "\n") save_file.write("Predicted Sentence:" + predicted_sentence + "\n") return tf.reduce_mean(loss), accuracy / len(y_strings)
def get_predictions_recorded( self, spectrogram=False, recordingpath='recordings/demo.wav', ): """ Print a model's decoded predictions from live recordings Params: index (int): The example you would like to visualize partition (str): One of 'train' or 'validation' input_to_softmax (Model): The acoustic model model_path (str): Path to saved acoustic model's weights """ # load the train and test data data_gen = AudioGenerator(spectrogram=spectrogram) data_gen.load_train_data() self.audio_path = recordingpath # obtain the true transcription and the audio feature data_point = data_gen.normalize(data_gen.featurize(recordingpath)) #pprint(data_point) # obtain and decode the acoustic model's predictions prediction = self.input_to_softmax.predict( np.expand_dims(data_point, axis=0)) output_length = [ self.input_to_softmax.output_length(data_point.shape[0]) ] pred_ints = (K.eval(K.ctc_decode(prediction, output_length)[0][0]) + 1).flatten().tolist() print('-' * 80) print('Predicted transcription:\n' + '\n' + ''.join(int_sequence_to_text(pred_ints))) print('-' * 80)
def predict_on_image(self, image: np.ndarray) -> Tuple[str, float]: """Predict on a single input.""" softmax_output_fn = KerasModel( inputs=[self.network.get_layer("image").input], outputs=[self.network.get_layer("softmax_output").output], ) if image.dtype == np.uint8: image = (image / 255).astype(np.float32) # Get the prediction and confidence using softmax_output_fn, passing the right input into it. input_image = np.expand_dims(image, 0) softmax_output = softmax_output_fn.predict(input_image) input_length = [softmax_output.shape[1]] decoded, log_prob = K.ctc_decode(softmax_output, input_length, greedy=True) pred_raw = K.eval(decoded[0])[0] pred = "".join(self.data.mapping[label] for label in pred_raw).strip() neg_sum_logit = K.eval(log_prob)[0][0] conf = np.exp(-neg_sum_logit) # Your code above (Lab 3) return pred, conf
def _decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1): """Decodes the output of a softmax. Can use either greedy search (also known as best path) or a constrained dictionary search. # Arguments y_pred: tensor `(samples, time_steps, num_categories)` containing the prediction, or output of the softmax. input_length: tensor `(samples, )` containing the sequence length for each batch item in `y_pred`. greedy: perform much faster best-path search if `true`. This does not use a dictionary. beam_width: if `greedy` is `false`: a beam search decoder will be used with a beam of this width. top_paths: if `greedy` is `false`, how many of the most probable paths will be returned. # Returns Tuple: List: if `greedy` is `true`, returns a list of one element that contains the decoded sequence. If `false`, returns the `top_paths` most probable decoded sequences. Important: blank labels are returned as `-1`. Tensor `(top_paths, )` that contains the log probability of each decoded sequence. """ decoded = K.ctc_decode(y_pred=y_pred, input_length=input_length, greedy=greedy, beam_width=beam_width, top_paths=top_paths) paths = [path for path in decoded[0]] logprobs = decoded[1] return (paths, logprobs)
def predict(self, uriImage): img = self.loadImage(uriImage) img = self.preprocessImg(img) random_img = [] random_img.append(img) random_img = np.array(random_img) prediction = self.act_model.predict(random_img) out = K.get_value( K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0]) * prediction.shape[1], greedy=True)[0][0]) # see the results all_predictions = [] i = 0 for x in out: # print("predicted text = ", end='') pred = "" for p in x: if int(p) != -1: pred += self.char_list[int(p)] all_predictions.append(pred) i += 1 print(all_predictions) return all_predictions[0]
def _decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1): decoded = K.ctc_decode(y_pred=y_pred, input_length=input_length, greedy=greedy, beam_width=beam_width, top_paths=top_paths) paths = [path.numpy() for path in decoded[0]] logprobs = decoded[1].numpy() return (paths, logprobs)
def predict_text(model, img): y_pred = model.predict(img[np.newaxis, :, :, :]) shape = y_pred[:, 2:, :].shape ctc_decode = K.ctc_decode(y_pred[:, 2:, :], input_length=np.ones(shape[0]) * shape[1])[0][0] ctc_out = K.get_value(ctc_decode)[:, :cfg.label_len] result_str = ''.join([cfg.characters[c] for c in ctc_out[0]]) result_str = result_str.replace('-', '') return result_str
def __call__(self, batch_logits: np.ndarray, input_length: int, **kwargs) -> List[np.ndarray]: """ Decode the best guess from logits using beam search algorithm. """ decoded = np.array((K.eval( K.ctc_decode(batch_logits, [input_length], greedy=False, beam_width=self.beam_width, top_paths=self.top_paths)[0][0])).flatten().tolist()) return [decoded]
def evaluate(model, batch_size=128, steps=20): batch_acc = 0 valid_data = CaptchaSequence(characters, batch_size, steps) for [X_test, y_test, _, _], _ in valid_data: y_pred = base_model.predict(X_test) shape = y_pred.shape out = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(shape[0])*shape[1])[0][0])[:, :4] if out.shape[1] == 4: batch_acc += (y_test == out).all(axis=1).mean() return batch_acc / steps
def identify_captcha(base64_img): image = base64.b64decode(base64_img) image = io.BytesIO(image) image = Image.open(image) x_test = np.array(np.array(image)/255.0).reshape((1, 30, 91, 3)) # with graph.as_default(): y_pred = base_model.predict(x_test) out = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0])*y_pred.shape[1], )[0][0])[:, :4] out = ''.join([characters[m] for m in out[0]]) return out
def decode_batch_predictions( pred ): input_len = np.ones(pred.shape[0]) * pred.shape[1] # Use greedy search. For complex tasks, you can use beam search results = K.ctc_decode( pred, input_length=input_len, greedy=True )[0][0][:,:4] # Iterate over the results and get back the text output_text = [] for res in results: res = tf.strings.reduce_join(num_to_char(res)).numpy().decode('utf-8') output_text.append(res) return output_text
def call(self, inputs, **kwargs): shape = tf.shape(inputs) batch_size = shape[0] max_length = shape[1, None] input_length = tf.tile(max_length, [batch_size]) prediction, scores = K.ctc_decode(inputs, input_length, beam_width=self.beam_width) return [prediction, scores]
def making_prediction(best_model,test_data,test_generator,test_labels) : y_pred = best_model.predict(test_data, batch_size=2) input_shape = np.ones(y_pred.shape[0])*y_pred.shape[1] out = K.get_value(K.ctc_decode(y_pred, input_length=input_shape,greedy=True)[0][0]) pred = [] for element in out : pred.append(labels_to_text(element[:fine_stop_element(element)])) gt = [] for img in test_generator.texts : gt.append(test_labels[img]) return pred , gt
def __call__(self, batch_logits: np.ndarray, input_length: int) -> List[np.ndarray]: """ Decode the best guess from logits using greedy algorithm. """ # Choose the class with maximum probability # best_candidates = np.argmax(batch_logits, axis=2) # Merge repeated chars # decoded = [np.array([k for k, _ in itertools.groupby(best_candidate)]) # for best_candidate in best_candidates] decoded = np.array((K.eval( K.ctc_decode(batch_logits, [input_length], greedy=True)[0][0])).flatten().tolist()) return [decoded]
def predict(filename, my_model): #filepath='/content/sample_data/IAM/Images/'+filename sample_processed_image=[] sample_processed_image.append((preprocess_image(filename, 128, 64)).T) sample_processed_image=np.array(sample_processed_image) sample_processed_image = sample_processed_image.reshape(1, 128, 64, 1) prediction_trail = my_model.predict(x=sample_processed_image) prediction_decode = tf_keras_backend.get_value(tf_keras_backend.ctc_decode(prediction_trail, input_length = np.ones(prediction_trail.shape[0])*prediction_trail.shape[1], greedy=True)[0][0]) return decode_text(prediction_decode)
def Predict(self, data_input, input_len): ''' 预测结果 返回语音识别后的拼音符号列表 ''' batch_size = 1 in_len = np.zeros((batch_size), dtype=np.int32) in_len[0] = input_len x_in = np.zeros((batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) for i in range(batch_size): x_in[i, 0:len(data_input)] = data_input base_pred = self.base_model.predict(x=x_in) #print('base_pred:\n', base_pred) #y_p = base_pred #for j in range(200): # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) # count=0 # for i in range(y_p[0][j].shape[0]): # if(y_p[0][j][i] < mean): # count += 1 # print('count:',count) base_pred = base_pred[:, :, :] #base_pred =base_pred[:, 2:, :] r = K.ctc_decode(base_pred, in_len, greedy=True, beam_width=100, top_paths=1) #print('r', r) r1 = K.get_value(r[0][0]) #print('r1', r1) #r2 = K.get_value(r[1]) #print(r2) r1 = r1[0] return r1 pass
def get_decoder(output_tensor, alphabet): def get_length(tensor): lengths = tf.reduce_sum(tf.ones_like(tensor), 1) return tf.cast(lengths, tf.int32) sequence_length = get_length(tf.reduce_max(output_tensor, 2)) top_k_decoded, _ = K.ctc_decode(output_tensor, sequence_length, greedy=False, beam_width=64) print(top_k_decoded[0]) decoder = K.function([output_tensor], [top_k_decoded[0]]) return partial(batch_tensorflow_decode, alphabet=alphabet, decoder=decoder)
def decode(self, pred): input_len = np.ones(pred.shape[0]) * pred.shape[1] # Use greedy search. For complex tasks, you can use beam search results = ctc_decode(pred, input_length=input_len, greedy=True)[0][0][:, :self.max_length] # Iterate over the results and get back the text output_text = [] for res in results: res = self.num_to_char(res) res = reduce_join(res) res = res.numpy().decode("utf-8") output_text.append(res) return output_text
def decode_predict_ctc(out, chars = ArchitectureConfig.CHARS, top_paths=1): results = [] beam_width = 5 if beam_width < top_paths: beam_width = top_paths for i in range(top_paths): lables = backend.get_value( backend.ctc_decode( out, input_length=np.ones(out.shape[0]) * out.shape[1], greedy=False, beam_width=beam_width, top_paths=top_paths )[0][i] )[0] text = labels_to_text(chars, lables) results.append(text) return results
def Predict(self, data_input, input_len): ''' 预测结果 返回语音识别后的拼音符号列表 ''' batch_size = 1 in_len = np.zeros((batch_size), dtype=np.int32) in_len[0] = input_len x_in = np.zeros((batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) for i in range(batch_size): x_in[i, 0:len(data_input)] = data_input base_pred = self.base_model.predict(x=x_in) #print('base_pred:\n', base_pred) #y_p = base_pred #for j in range(200): # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) # count=0 # for i in range(y_p[0][j].shape[0]): # if(y_p[0][j][i] < mean): # count += 1 # print('count:',count) base_pred = base_pred[:, :, :] #base_pred =base_pred[:, 2:, :] r = K.ctc_decode(base_pred, in_len, greedy=True, beam_width=100, top_paths=1) #print('r', r) if (tf.__version__[0:2] == '1.'): r1 = r[0][0].eval(session=tf.compat.v1.Session()) else: r1 = r[0][0].numpy() #tf.compat.v1.reset_default_graph() return r1[0]
def predict_text(img): # read image image = cv2.imdecode(img) # cv2.imshow("decoded", image) cv2.imwrite("./test_img.jpg", image) # preprocess image = preprocess(image) # predict image text pred = model.predict(image) # decode ctc decoded = K.get_value(K.ctc_decode(pred, input_length=np.ones(pred.shape[0])*pred.shape[1], greedy=True)[0][0]) predicted_text = num_to_label(decoded[0]) print("======================") print(predicted_text) return predicted_text
def Predict(self, data_input, input_len): ''' 预测结果 返回语音识别后的拼音符号列表 ''' batch_size = 1 in_len = np.zeros((batch_size),dtype = np.int32) in_len[0] = input_len x_in = np.zeros((batch_size, self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) for i in range(batch_size): x_in[i,0:len(data_input)] = data_input base_pred = self.base_model.predict(x = x_in) base_pred =base_pred[:, :, :] r = K.ctc_decode(base_pred, in_len, greedy = True, beam_width=100, top_paths=1) r1 = K.get_value(r[0][0]) r1=r1[0] return r1
def get_prediction(act_model, test_images): prediction = act_model.predict(test_images) decoded = K.ctc_decode(prediction, input_length=np.ones(prediction.shape[0]) * prediction.shape[1], greedy=True)[0][0] out = K.get_value(decoded) prediction = [] for i, x in enumerate(out): pred = '' for p in x: if int(p) != -1: pred += letters[int(p)] prediction.append(pred) return prediction
def test(base_model): data, label = gen_data_label_data(False) y_pred = base_model.predict(data) shape = y_pred[:, :, :].shape out = K.get_value( K.ctc_decode(y_pred[:, :, :], input_length=np.ones(shape[0]) * shape[1])[0][0])[:, :7] right_num = 0 for i in range(len(data)): eco = len(chars) + 1 str_label = ''.join([str(x) for x in label[i] if x != eco]) str_out = ''.join([str(x) for x in out[i] if x != eco]) if str_label == str_out: right_num += 1 acc = (right_num / len(data)) * 100 print("test acc is :{}%".format(str(acc)))
def predict(self, x, batch_size=None, steps=1, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False, ctc_decode=True): out = self.model.predict(x=x, batch_size=batch_size, verbose=0, steps=steps, callbacks=callbacks, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing) if not ctc_decode: return np.log(out.clip(min=1e-8)), [] steps_done = 0 batch_size = int(np.ceil(len(out) / steps)) input_length = len(max(out, key=len)) predicts, probabilities = [], [] while steps_done < steps: index = steps_done * batch_size until = index + batch_size x_test = np.asarray(out[index:until]) x_test_len = np.asarray([input_length for _ in range(len(x_test))]) decode, log = K.ctc_decode(x_test, x_test_len, greedy=self.greedy, beam_width=self.beam_width, top_paths=self.top_paths) probabilities.extend([np.exp(x) for x in log]) decode = [[[int(p) for p in x if p != -1] for x in y] for y in decode] predicts.extend(np.swapaxes(decode, 0, 1)) steps_done += 1 return (predicts, probabilities)