def batch_ctc_prefix_beam_search_cpu(self, batch_log_probs_seq, batch_log_probs_idx, batch_len, batch_root, batch_start, beam_size, blank_id, space_id, cutoff_prob, num_processes, scorer): """ Return: Batch x Beam_size elements, each element is a tuple (score, list of ids), """ batch_len_list = batch_len batch_log_probs_seq_list = [] batch_log_probs_idx_list = [] for i in range(len(batch_len_list)): cur_len = int(batch_len_list[i]) batch_log_probs_seq_list.append(batch_log_probs_seq[i][0:cur_len].tolist()) batch_log_probs_idx_list.append(batch_log_probs_idx[i][0:cur_len].tolist()) score_hyps = ctc_beam_search_decoder_batch(batch_log_probs_seq_list, batch_log_probs_idx_list, batch_root, batch_start, beam_size, num_processes, blank_id, space_id, cutoff_prob, scorer) return score_hyps
def ctc_beam_search_decoder_batch(probs_split, vocabulary, beam_size, num_processes, cutoff_prob=1.0, cutoff_top_n=40, ext_scoring_func=None): """Wrapper for the batched CTC beam search decoder. :param probs_seq: 3-D list with each element as an instance of 2-D list of probabilities used by ctc_beam_search_decoder(). :type probs_seq: 3-D list :param vocabulary: Vocabulary list. :type vocabulary: list :param beam_size: Width for beam search. :type beam_size: int :param num_processes: Number of parallel processes. :type num_processes: int :param cutoff_prob: Cutoff probability in vocabulary pruning, default 1.0, no pruning. :type cutoff_prob: float :param cutoff_top_n: Cutoff number in pruning, only top cutoff_top_n characters with highest probs in vocabulary will be used in beam search, default 40. :type cutoff_top_n: int :param num_processes: Number of parallel processes. :type num_processes: int :param ext_scoring_func: External scoring function for partially decoded sentence, e.g. word count or language model. :type external_scoring_function: callable :return: List of tuples of log probability and sentence as decoding results, in descending order of the probability. :rtype: list """ probs_split = [probs_seq.tolist() for probs_seq in probs_split] batch_beam_results = swig_decoders.ctc_beam_search_decoder_batch( probs_split, vocabulary, beam_size, num_processes, cutoff_prob, cutoff_top_n, ext_scoring_func) batch_beam_results = [ [(res[0], res[1]) for res in beam_results] for beam_results in batch_beam_results ] return batch_beam_results
def ctc_beam_search_decoder_batch(probs_split, vocabulary, beam_size, num_processes, cutoff_prob=1.0, cutoff_top_n=40, blank_id=0, ext_scoring_func=None): """Wrapper for the batched CTC beam search decoder. :param probs_seq: 3-D列表,每个元素作为ctc_beam_search_decoder()使用的2-D概率列表的实例 :type probs_seq: 3-D list :param vocabulary: 词汇列表 :type vocabulary: list :param beam_size: 集束搜索宽度 :type beam_size: int :param cutoff_prob: 剪枝中的截断概率,默认1.0,没有剪枝 :type cutoff_prob: float :param cutoff_top_n: 剪枝时的截断数,仅在词汇表中具有最大probs的cutoff_top_n字符用于光束搜索,默认为40 :type cutoff_top_n: int :param blank_id 空白索引 :type blank_id int :param num_processes: 并行解码进程数 :type num_processes: int :param ext_scoring_func: 外部评分功能部分解码句子,如字计数或语言模型 :type external_scoring_function: callable :return: 解码结果为log概率和句子的元组列表,按概率降序排列的列表 :rtype: list """ probs_split = [probs_seq.tolist() for probs_seq in probs_split] batch_beam_results = swig_decoders.ctc_beam_search_decoder_batch( probs_split, vocabulary, beam_size, num_processes, cutoff_prob, cutoff_top_n, blank_id, ext_scoring_func) batch_beam_results = [[(res[0], res[1]) for res in beam_results] for beam_results in batch_beam_results] return batch_beam_results
def execute(self, requests): """`execute` must be implemented in every Python model. `execute` function receives a list of pb_utils.InferenceRequest as the only argument. This function is called when an inference is requested for this model. Parameters ---------- requests : list A list of pb_utils.InferenceRequest Returns ------- list A list of pb_utils.InferenceResponse. The length of this list must be the same as `requests` """ responses = [] # Every Python backend must iterate through list of requests and create # an instance of pb_utils.InferenceResponse class for each of them. You # should avoid storing any of the input Tensors in the class attributes # as they will be overridden in subsequent inference requests. You can # make a copy of the underlying NumPy array and store it if it is # required. batch_encoder_out, batch_encoder_lens = [], [] batch_log_probs, batch_log_probs_idx = [], [] batch_count = [] batch_root = TrieVector() batch_start = [] root_dict = {} encoder_max_len = 0 hyps_max_len = 0 total = 0 for request in requests: # Perform inference on the request and append it to responses list... in_0 = pb_utils.get_input_tensor_by_name(request, "encoder_out") in_1 = pb_utils.get_input_tensor_by_name(request, "encoder_out_lens") in_2 = pb_utils.get_input_tensor_by_name(request, "batch_log_probs") in_3 = pb_utils.get_input_tensor_by_name(request, "batch_log_probs_idx") batch_encoder_out.append(in_0.as_numpy()) encoder_max_len = max(encoder_max_len, batch_encoder_out[-1].shape[1]) cur_b_lens = in_1.as_numpy() batch_encoder_lens.append(cur_b_lens) cur_batch = cur_b_lens.shape[0] batch_count.append(cur_batch) cur_b_log_probs = in_2.as_numpy() cur_b_log_probs_idx = in_3.as_numpy() for i in range(cur_batch): cur_len = cur_b_lens[i] cur_probs = cur_b_log_probs[i][ 0:cur_len, :].tolist() # T X Beam cur_idx = cur_b_log_probs_idx[i][ 0:cur_len, :].tolist() # T x Beam batch_log_probs.append(cur_probs) batch_log_probs_idx.append(cur_idx) root_dict[total] = PathTrie() batch_root.append(root_dict[total]) batch_start.append(True) total += 1 score_hyps = ctc_beam_search_decoder_batch( batch_log_probs, batch_log_probs_idx, batch_root, batch_start, self.beam_size, min(total, self.num_processes), blank_id=self.blank_id, space_id=-2, cutoff_prob=self.cutoff_prob, ext_scorer=self.lm) all_hyps = [] all_ctc_score = [] max_seq_len = 0 for seq_cand in score_hyps: # if candidates less than beam size if len(seq_cand) != self.beam_size: seq_cand = list(seq_cand) seq_cand += (self.beam_size - len(seq_cand)) * [(-float("INF"), (0, ))] for score, hyps in seq_cand: all_hyps.append(list(hyps)) all_ctc_score.append(score) max_seq_len = max(len(hyps), max_seq_len) beam_size = self.beam_size feature_size = self.feature_size hyps_max_len = max_seq_len + 2 in_ctc_score = np.zeros((total, beam_size), dtype=self.data_type) in_hyps_pad_sos_eos = np.ones( (total, beam_size, hyps_max_len), dtype=np.int64) * self.eos if self.bidecoder: in_r_hyps_pad_sos_eos = np.ones( (total, beam_size, hyps_max_len), dtype=np.int64) * self.eos in_hyps_lens_sos = np.ones((total, beam_size), dtype=np.int32) in_encoder_out = np.zeros((total, encoder_max_len, feature_size), dtype=self.data_type) in_encoder_out_lens = np.zeros(total, dtype=np.int32) st = 0 for b in batch_count: t = batch_encoder_out.pop(0) in_encoder_out[st:st + b, 0:t.shape[1]] = t in_encoder_out_lens[st:st + b] = batch_encoder_lens.pop(0) for i in range(b): for j in range(beam_size): cur_hyp = all_hyps.pop(0) cur_len = len(cur_hyp) + 2 in_hyp = [self.sos] + cur_hyp + [self.eos] in_hyps_pad_sos_eos[st + i][j][0:cur_len] = in_hyp in_hyps_lens_sos[st + i][j] = cur_len - 1 if self.bidecoder: r_in_hyp = [self.sos] + cur_hyp[::-1] + [self.eos] in_r_hyps_pad_sos_eos[st + i][j][0:cur_len] = r_in_hyp in_ctc_score[st + i][j] = all_ctc_score.pop(0) st += b in_encoder_out_lens = np.expand_dims(in_encoder_out_lens, axis=1) in_tensor_0 = pb_utils.Tensor("encoder_out", in_encoder_out) in_tensor_1 = pb_utils.Tensor("encoder_out_lens", in_encoder_out_lens) in_tensor_2 = pb_utils.Tensor("hyps_pad_sos_eos", in_hyps_pad_sos_eos) in_tensor_3 = pb_utils.Tensor("hyps_lens_sos", in_hyps_lens_sos) input_tensors = [in_tensor_0, in_tensor_1, in_tensor_2, in_tensor_3] if self.bidecoder: in_tensor_4 = pb_utils.Tensor("r_hyps_pad_sos_eos", in_r_hyps_pad_sos_eos) input_tensors.append(in_tensor_4) in_tensor_5 = pb_utils.Tensor("ctc_score", in_ctc_score) input_tensors.append(in_tensor_5) inference_request = pb_utils.InferenceRequest( model_name='decoder', requested_output_names=['best_index'], inputs=input_tensors) inference_response = inference_request.exec() if inference_response.has_error(): raise pb_utils.TritonModelException( inference_response.error().message()) else: # Extract the output tensors from the inference response. best_index = pb_utils.get_output_tensor_by_name( inference_response, 'best_index') best_index = best_index.as_numpy() hyps = [] idx = 0 for cands, cand_lens in zip(in_hyps_pad_sos_eos, in_hyps_lens_sos): best_idx = best_index[idx][0] best_cand_len = cand_lens[best_idx] - 1 # remove sos best_cand = cands[best_idx][1:1 + best_cand_len].tolist() hyps.append(best_cand) idx += 1 hyps = map_batch( hyps, self.vocabulary, min(multiprocessing.cpu_count(), len(in_ctc_score))) st = 0 for b in batch_count: sents = np.array(hyps[st:st + b]) out0 = pb_utils.Tensor("OUTPUT0", sents.astype(self.out0_dtype)) inference_response = pb_utils.InferenceResponse( output_tensors=[out0]) responses.append(inference_response) st += b return responses
def main(): args = get_args() logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(levelname)s %(message)s') os.environ['CUDA_VISIBLE_DEVICES'] = str(args.gpu) with open(args.config, 'r') as fin: configs = yaml.load(fin, Loader=yaml.FullLoader) if len(args.override_config) > 0: configs = override_config(configs, args.override_config) symbol_table = read_symbol_table(args.dict) test_conf = copy.deepcopy(configs['dataset_conf']) test_conf['filter_conf']['max_length'] = 102400 test_conf['filter_conf']['min_length'] = 0 test_conf['filter_conf']['token_max_length'] = 102400 test_conf['filter_conf']['token_min_length'] = 0 test_conf['filter_conf']['max_output_input_ratio'] = 102400 test_conf['filter_conf']['min_output_input_ratio'] = 0 test_conf['speed_perturb'] = False test_conf['spec_aug'] = False test_conf['shuffle'] = False test_conf['sort'] = False test_conf['fbank_conf']['dither'] = 0.0 test_conf['batch_conf']['batch_type'] = "static" test_conf['batch_conf']['batch_size'] = args.batch_size test_dataset = Dataset(args.data_type, args.test_data, symbol_table, test_conf, args.bpe_model, partition=False) test_data_loader = DataLoader(test_dataset, batch_size=None, num_workers=0) # Init asr model from configs use_cuda = args.gpu >= 0 and torch.cuda.is_available() if use_cuda: EP_list = ['CUDAExecutionProvider', 'CPUExecutionProvider'] else: EP_list = ['CPUExecutionProvider'] encoder_ort_session = rt.InferenceSession(args.encoder_onnx, providers=EP_list) decoder_ort_session = None if args.mode == "attention_rescoring": decoder_ort_session = rt.InferenceSession(args.decoder_onnx, providers=EP_list) # Load dict vocabulary = [] char_dict = {} with open(args.dict, 'r') as fin: for line in fin: arr = line.strip().split() assert len(arr) == 2 char_dict[int(arr[1])] = arr[0] vocabulary.append(arr[0]) eos = sos = len(char_dict) - 1 with torch.no_grad(), open(args.result_file, 'w') as fout: for _, batch in enumerate(test_data_loader): keys, feats, _, feats_lengths, _ = batch ort_inputs = { encoder_ort_session.get_inputs()[0].name: feats.numpy(), encoder_ort_session.get_inputs()[1].name: feats_lengths.numpy() } ort_outs = encoder_ort_session.run(None, ort_inputs) encoder_out, encoder_out_lens, ctc_log_probs, \ beam_log_probs, beam_log_probs_idx = ort_outs beam_size = beam_log_probs.shape[-1] batch_size = beam_log_probs.shape[0] num_processes = min(multiprocessing.cpu_count(), batch_size) if args.mode == 'ctc_greedy_search': if beam_size != 1: log_probs_idx = beam_log_probs_idx[:, :, 0] batch_sents = [] for idx, seq in enumerate(log_probs_idx): batch_sents.append(seq[0:encoder_out_lens[idx]].tolist()) hyps = map_batch(batch_sents, vocabulary, num_processes, True, 0) elif args.mode in ('ctc_prefix_beam_search', "attention_rescoring"): batch_log_probs_seq_list = beam_log_probs.tolist() batch_log_probs_idx_list = beam_log_probs_idx.tolist() batch_len_list = encoder_out_lens.tolist() batch_log_probs_seq = [] batch_log_probs_ids = [] batch_start = [] # only effective in streaming deployment batch_root = TrieVector() root_dict = {} for i in range(len(batch_len_list)): num_sent = batch_len_list[i] batch_log_probs_seq.append( batch_log_probs_seq_list[i][0:num_sent]) batch_log_probs_ids.append( batch_log_probs_idx_list[i][0:num_sent]) root_dict[i] = PathTrie() batch_root.append(root_dict[i]) batch_start.append(True) score_hyps = ctc_beam_search_decoder_batch( batch_log_probs_seq, batch_log_probs_ids, batch_root, batch_start, beam_size, num_processes, 0, -2, 0.99999) if args.mode == 'ctc_prefix_beam_search': hyps = [] for cand_hyps in score_hyps: hyps.append(cand_hyps[0][1]) hyps = map_batch(hyps, vocabulary, num_processes, False, 0) if args.mode == 'attention_rescoring': ctc_score, all_hyps = [], [] max_len = 0 for hyps in score_hyps: cur_len = len(hyps) if len(hyps) < beam_size: hyps += (beam_size - cur_len) * [(-float("INF"), (0, ))] for hyp in hyps: ctc_score.append(hyp[0]) all_hyps.append(list(hyp[1])) if len(hyp[1]) + 1 > max_len: max_len = len(hyp[1]) + 1 assert len(ctc_score) == beam_size * batch_size hyps_pad_sos = np.ones((batch_size, beam_size, max_len), dtype=np.int64) * IGNORE_ID r_hyps_pad_sos = np.ones((batch_size, beam_size, max_len), dtype=np.int64) * IGNORE_ID hyps_lens_sos = np.ones((batch_size, beam_size), dtype=np.int32) k = 0 for i in range(batch_size): for j in range(beam_size): cand = all_hyps[k] hyps_pad_sos[i][j][0:len(cand) + 1] = [sos] + cand r_hyps_pad_sos[i][j][0:len(cand) + 1] = [sos] + cand[::-1] hyps_lens_sos[i][j] = len(cand) + 1 k += 1 decoder_ort_inputs = { decoder_ort_session.get_inputs()[0].name: encoder_out, decoder_ort_session.get_inputs()[1].name: encoder_out_lens, decoder_ort_session.get_inputs()[2].name: hyps_pad_sos, decoder_ort_session.get_inputs()[3].name: hyps_lens_sos, decoder_ort_session.get_inputs()[4].name: r_hyps_pad_sos } decoder_out, r_decoder_out = decoder_ort_session.run( None, decoder_ort_inputs) best_sents = [] k = 0 for d_o, r_d_o in zip(decoder_out, r_decoder_out): # d_0 & r_d_o: beam x T x V cur_best_sent = [] cur_best_score = -float("inf") for sent_d_o, sent_r_d_o in zip(d_o, r_d_o): cand = all_hyps[k] + [eos] r_cand = all_hyps[k][::-1] + [eos] score, r_score = 0, 0 for i in range(len(cand)): index, r_index = cand[i], r_cand[i] score += sent_d_o[i][index] r_score += sent_r_d_o[i][r_index] if args.reverse_weight > 0: score = score * (1 - args.reverse_weight) + \ args.reverse_weight * r_score score = score + args.ctc_weight * ctc_score[k] if score > cur_best_score: cur_best_sent = all_hyps[k] cur_best_score = score k += 1 best_sents.append(cur_best_sent) hyps = map_batch(best_sents, vocabulary, num_processes) for i, key in enumerate(keys): content = hyps[i] logging.info('{} {}'.format(key, content)) fout.write('{} {}\n'.format(key, content))