def get_denoised(prob, ctc_bs=False): if ctc_bs: # Using ctc beam search before denoising yields only limited improvements a is very slow text = get_beam_search(prob) else: text = get_arg_max(prob) src_seq, src_valid_length = encode_char(text) src_seq = mx.nd.array([src_seq], ctx=ctx) src_valid_length = mx.nd.array(src_valid_length, ctx=ctx) encoder_outputs, _ = denoiser.encode(src_seq, valid_length=src_valid_length) states = denoiser.decoder.init_state_from_encoder(encoder_outputs, encoder_valid_length=src_valid_length) inputs = mx.nd.full(shape=(1,), ctx=src_seq.context, dtype=np.float32, val=BOS) output = generator.generate_sequences(inputs, states, text) return output.strip()
def get_sentence(net, sentence): scorer = nlp.model.BeamSearchScorer(alpha=0, K=2, from_logits=False) beam_sampler = nlp.model.BeamSearchSampler(beam_size=5, decoder=net.decode_logprob, eos_id=EOS, scorer=scorer, max_length=150) src_seq, src_valid_length = encode_char(sentence) src_seq = mx.nd.array([src_seq], ctx=ctx) src_valid_length = mx.nd.array(src_valid_length, ctx=ctx) encoder_outputs, _ = net.encode(src_seq, valid_length=src_valid_length) states = net.decoder.init_state_from_encoder(encoder_outputs, encoder_valid_length=src_valid_length) inputs = mx.nd.full(shape=(1,), ctx=src_seq.context, dtype=np.float32, val=BOS) samples, scores, valid_lengths = beam_sampler(inputs, states) samples = samples[0].asnumpy() scores = scores[0].asnumpy() valid_lengths = valid_lengths[0].asnumpy() return decode_char(samples[0])
src_seq = mx.nd.array([src_seq], ctx=ctx) src_valid_length = mx.nd.array(src_valid_length, ctx=ctx) encoder_outputs, _ = denoiser.encode(src_seq, valid_length=src_valid_length) states = denoiser.decoder.init_state_from_encoder( encoder_outputs, encoder_valid_length=src_valid_length) inputs = mx.nd.full(shape=(1, ), ctx=src_seq.context, dtype=np.float32, val=BOS) output = generator.generate_sequences(inputs, states, text) return output.strip() sentence = "This sentnce has an eror" src_seq, src_valid_length = encode_char(sentence) src_seq = mx.nd.array([src_seq], ctx=ctx) src_valid_length = mx.nd.array(src_valid_length, ctx=ctx) encoder_outputs, _ = denoiser.encode(src_seq, valid_length=src_valid_length) states = denoiser.decoder.init_state_from_encoder( encoder_outputs, encoder_valid_length=src_valid_length) inputs = mx.nd.full(shape=(1, ), ctx=src_seq.context, dtype=np.float32, val=BOS) print(sentence) print("Choice") print(generator.generate_sequences(inputs, states, sentence)) sentence = "This sentnce has an eror" src_seq, src_valid_length = encode_char(sentence)
def transform(data, label): src, src_valid_length = encode_char(data, src=True) tgt, tgt_valid_length = encode_char(label, src=False) return src, src_valid_length, tgt, tgt_valid_length, data, label