def main(): dl = DataLoader( source_path='../temp/letters_source.txt', target_path='../temp/letters_target.txt') sources, targets = dl.load() tf_estimator = tf.estimator.Estimator( tf_estimator_model_fn, params=prepare_params(dl), model_dir=args.model_dir) for epoch in range(args.num_epochs): tf_estimator.train(tf.estimator.inputs.numpy_input_fn( x = {'source':sources, 'target':targets}, batch_size = args.batch_size, num_epochs = None, shuffle = True), steps=1000) greedy_decode(['apple', 'common', 'zhedong'], tf_estimator, dl)
def main(): dl = DataLoader(source_path='../temp/dialog_source.txt', target_path='../temp/dialog_target.txt') sources, targets = dl.load() print('Source Vocab Size:', len(dl.source_word2idx)) print('Target Vocab Size:', len(dl.target_word2idx)) tf_estimator = tf.estimator.Estimator(tf_estimator_model_fn, params=prepare_params(dl)) for epoch in range(1): tf_estimator.train( tf.estimator.inputs.numpy_input_fn(x={ 'source': sources, 'target': targets }, batch_size=args.batch_size, num_epochs=1, shuffle=True)) greedy_decode(['你是谁', '你喜欢我吗', '给我唱一首歌', '我帅吗'], tf_estimator, dl)
def acc_batch(preds, labels): labels = [x[:np.where(x == -1)[0][0]] for x in labels] output = greedy_decode(preds, blank=train_parameters["class_dim"]) total = 0 right = 0 for y, p in zip(labels, output): y_s = "".join([train_parameters['r_label_dict'][c] for c in y]) p_s = "".join([train_parameters['r_label_dict'][c] for c in p]) if y_s == p_s: right += 1 total += 1 return right, total
def static_infer(files, save_static_path): # 静态图中需要使用执行器执行之前已经定义好的网络 place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() print('train with {}'.format(place)) exe = fluid.Executor(place) program, feed_vars, fetch_vars = fluid.io.load_inference_model(save_static_path, exe) # 静态图中需要调用执行器的run方法执行计算过程 result_list = [] for file in tqdm(files): img = precess_img(file) fetch, = exe.run(program, feed={feed_vars[0]: img}, fetch_list=fetch_vars) output = utils.greedy_decode(fetch, blank=train_parameters["class_dim"]) p_s = "".join([train_parameters['r_label_dict'][c] for c in output[0]]) result_list.append('{0}\t{1}'.format(os.path.basename(file), p_s)) return result_list
def test(model: nn.Module, device: torch.device, test_loader: DataLoader, criterion: nn.Module, text_transform: Callable, log_every=40): print('Evaluating...') model.eval() test_cer, test_wer, test_loss = [], [], [] data_len = len(test_loader) with torch.no_grad(): for i, _data in enumerate(test_loader): spectrograms, labels, input_lengths, label_lengths = _data spectrograms, labels = spectrograms.to(device), labels.to(device) output = model(spectrograms) # (batch, time, n_class) output = F.log_softmax(output, dim=2) output = output.transpose(0, 1) # (time, batch, n_class) loss = criterion(output, labels, input_lengths, label_lengths) test_loss.append(loss.item()) decoded_preds, decoded_targets = greedy_decode( output.transpose(0, 1), labels, label_lengths, text_transform) test_cer.append( word_error_rate(decoded_targets, decoded_preds, use_cer=True)) test_wer.append(word_error_rate(decoded_targets, decoded_preds)) if i % log_every == 0: print(f'{i}/{data_len}') print(f'Test WER: {test_wer[-1]}; CER: {test_cer[-1]}') for p, t in zip(decoded_preds, decoded_targets): print(f'Prediction: [{p}]\t Ground Truth: [{t}]') avg_cer = np.mean(test_cer) avg_wer = np.mean(test_wer) avg_loss = np.mean(test_loss) print( f'Test set: Average loss: {avg_loss}, Average CER: {avg_cer} Average WER: {avg_wer}' ) return Metrics(loss=avg_loss, cer=avg_cer, wer=avg_wer)
def infer(files, save_static_path=None): result_list = [] place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace() print('train with {}'.format(place)) with fluid.dygraph.guard(place): params, _ = fluid.load_dygraph('{}/crnn_best'.format('output/baidu_model'))#train_parameters['save_model_dir'])) # crnn = CRNN(train_parameters["class_dim"] + 1, 1) crnn = CRNN(3828, 1) crnn.load_dict(params) crnn.eval() for file in tqdm(files): img = precess_img(file) img = fluid.dygraph.to_variable(img).astype('float32') if save_static_path is not None: out_dygraph, static_layer = TracedLayer.trace(crnn, inputs=[img]) # 将转换后的模型保存 static_layer.save_inference_model(save_static_path, feed=[0], fetch=[0]) pred = crnn(img) output = utils.greedy_decode(pred.numpy(), blank=train_parameters["class_dim"]) p_s = "".join([train_parameters['r_label_dict'][c] for c in output[0]]) result_list.append('{0}\t{1}'.format(os.path.basename(file), p_s)) break return result_list
def decoding(model, src, batch, hand_regions=None, start_symbol=1, max_len=20, device='cuda', method='greedy', n_beam=8): src = Variable(src) if (method == 'greedy'): translations = greedy_decode(model, src, hand_regions, batch.rel_mask, batch.src_mask, max_len=max_len, start_symbol=start_symbol, n_devices=1) elif (method == 'beam'): translations = beam_decode(model, src, hand_regions, batch.rel_mask, batch.src_mask, max_len=max_len, start_symbol=start_symbol, n_beam=n_beam) else: print("Decoding method is not supported !") quit(0) return translations
def run_test(model, test_data, test_iter): preds = [] for k, batch in enumerate(test_iter): model.eval() with torch.no_grad(): batch = rebatch(PAD_INDEX, batch) pred, attn = greedy_decode(model, batch.src, batch.src_mask, batch.src_lengths, max_len=25, sos_index=TRG.vocab.stoi[SOS_TOKEN], eos_index=TRG.vocab.stoi[EOS_TOKEN]) preds.append(pred) hypotheses = [lookup_words(pred, TRG.vocab) for pred in preds] hypotheses = [" ".join(h) for h in hypotheses] references = [" ".join(data.trg) for data in test_data] bleu = sacrebleu.raw_corpus_bleu(hypotheses, [references], .01).score print("BLEU score: ", bleu) return bleu
batch_size=hparams['batch_size'], shuffle=False, collate_fn=lambda x: data_processing( x, text_transform, audio_transforms), **kwargs) blank_id = len(text_transform) preds = [] print('Making prediction') data_len = len(loader) for i, batch in enumerate(loader): print(f'{i}/{data_len}') spectrograms, labels, input_lengths, label_lengths = batch spectrograms, labels = spectrograms.to(device), labels.to(device) output = model(spectrograms) # (batch, time, n_class) output = F.log_softmax(output, dim=2) output = output.transpose(0, 1) # (time, batch, n_class) decoded_preds, decoded_targets = greedy_decode(output.transpose(0, 1), labels, label_lengths, text_transform) preds.extend(decoded_preds) submission = pd.DataFrame({ 'number': preds, 'path': data_df['path'].tolist() }) submission.to_csv('submission.csv')
def run_epoch(model, data, is_train=False, device='cuda:0', n_devices=1): if is_train: model.train() # Set model to training mode print ("Training..") phase='train' else: model.eval() # Set model to evaluate mode print ("Evaluating..") phase='valid' start_time = time.time() loss = 0.0 total_loss = 0.0 total_tokens = 0 total_seqs = 0 tokens = 0 total_correct = 0.0 n_correct = 0.0 total_wer_score = 0.0 sentence_count = 0 targets = [] hypotheses = [] #For progress bar bar = progressbar.ProgressBar(maxval=dataset_sizes[phase], widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() j = 0 #Loop over minibatches for step, (x, x_lengths, y, y_lengths, gloss, gloss_lengths) in enumerate(data): #Update progress bar with every iter j += len(x) bar.update(j) if(type(gloss) != type(None)): gloss = torch.from_numpy(gloss).to(device) y = torch.from_numpy(y).to(device) x = x.to(device) #NOTE: clone y to avoid overridding it batch = Batch(x_lengths, y_lengths, None, y.clone(), emb_type=args.emb_type, DEVICE=device, fixed_padding=args.fixed_padding, rel_window=args.rel_window) model.zero_grad() #Return tuple of (output, encoder_output) #output = (batch_size, tgt_seq_length, tgt_vocab_size) #encoder_output = (batch_size, input_seq_length, trg_vocab_size) if(args.hybrid): output, encoder_output = model.forward(x, batch.trg, batch.src_mask, batch.trg_mask, batch.rel_mask, None) #CTC loss expects (batch, trg_seq, trg_vocab) encoder_output = encoder_output.transpose(0,1) else: output = model.forward(x, batch.trg, batch.src_mask, batch.trg_mask, batch.rel_mask, None) #Produce translation for blue score #Evaluate on dev if(is_train==False): x = Variable(x) translations = greedy_decode(model, x, None, batch.rel_mask, batch.src_mask, max_len=20, start_symbol=1, device=device) #Loop over batch to create sentences for i in range(len(y)): ys = y[i, :] ys = ys[ys != 0] #NOTE: keep eos ys = ys[1:] translation = translations[i] hyp_trans = [vocab[x.item()] for x in translation] gt_trans = [vocab[x.item()] for x in ys] translation_corpus.append(hyp_trans) #NOTE: required to list of list (since we have 1 reference for each gt sentence) reference_corpus.append([gt_trans]) x_lengths = torch.IntTensor(x_lengths) y_lengths = torch.IntTensor(y_lengths) if(type(gloss_lengths) != type(None)): gloss_lengths = torch.IntTensor(gloss_lengths) #Get CTCloss of batch without averaging if(args.hybrid): loss_ctc = ctc_loss(encoder_output, gloss.cpu(), x_lengths.cpu(), gloss_lengths.cpu()) #Remove sos tokens from y y = y[:, 1:] #Predicted words with highest prob _, pred = torch.max(output, dim=-1) #NOTE: dont count pad for i in range(y.shape[0]): n_correct += (pred[i, :y_lengths[i]-1] == y[i, :y_lengths[i]-1]).sum() #NOTE: The transformer is an auto-regressive model: it makes predictions one part at a time, #and uses its output so far to decide what to do next #Teacher forcing is passing the true output to the next time step regardless of what the model predicts at the current time step. #Input of decoder (with sos and without eos) #Target (without sos and with eos) #NOTE: pred must be same shape as y y = y.contiguous().view(-1) pred = pred.contiguous().view(-1) output = output.view(-1, vocab_size) assert y.shape == pred.shape #Get loss cross entropy (from decoder) of batch without averaging loss = loss_fn(output, y) if(args.hybrid): #Joint CTC/Decoder loss loss = loss + loss_ctc total_loss += loss total_seqs += batch.seq total_tokens += batch.ntokens tokens += batch.ntokens total_correct += n_correct if is_train: loss.backward() #Weight clipping torch.nn.utils.clip_grad_norm_(model.parameters(), 1) optimizer.step() if step % 100 == 0: elapsed = time.time() - start_time print("Step: %d, Loss: %f, Frame per Sec: %f, Token per sec: %f, Word Accuracy: %f" % (step, loss / batch.ntokens, total_seqs * batch_size / elapsed, tokens / elapsed, n_correct.item() / tokens.item())) start_time = time.time() total_seqs = 0 tokens = 0 n_correct = 0.0 #Free some memory #NOTE: this helps alot in avoiding cuda out of memory del loss, output, y if(is_train): print("Total word Accuracy: %f" % (total_correct.item() / total_tokens.item())) return total_loss.item() / total_tokens.item() else: return translation_corpus, reference_corpus, total_loss.item() / total_tokens.item(), total_correct.item() / total_tokens.item()
def traverse_data(data_iter, model, src_eos_index=None, trg_eos_index=None, src_vocab=None, trg_vocab=None, x_file='new_x.txt', y_file='new_y.txt'): model.eval() BOS_TOKEN = "<s>" EOS_TOKEN = "</s>" UNK_TOKEN = "<unk>" if src_vocab is not None and trg_vocab is not None: src_bos_index = src_vocab.stoi[BOS_TOKEN] src_eos_index = src_vocab.stoi[EOS_TOKEN] src_unk_index = src_vocab.stoi[UNK_TOKEN] else: src_bos_index = 0 src_eos_index = 1 src_unk_index = 2 new_x = "" new_y = "" pred_y = "" for i, batch in enumerate(data_iter): result = greedy_decode(model, batch.src_idx, batch.src_mask, batch.src_lengths) for i in range(batch.nseqs): src = batch.src[i].cpu().numpy() trg_idx = batch.trg_idx[i].cpu().numpy() out = result[i] if src_eos_index is not None: eos_pos = np.where(src == src_eos_index)[0] if len(eos_pos) > 0: src = src[:eos_pos[0]] src = src[1:] if src[0] == src_bos_index else src src = src[:-1] if src[-1] == src_eos_index else src target_y = "" # remain_words = "" source_text = "" src = np.array([x for x in src if x != src_unk_index]) target_words = set(lookup_words(trg_idx, vocab=trg_vocab)) pred_words = set(lookup_words(out, vocab=trg_vocab)) pred_y += " ".join(word for word in pred_words) + "\n" target_y = " ".join(word for word in target_words) remain_words = " ".join(word for word in target_words if word not in pred_words) source_text = " ".join(lookup_words(src, vocab=src_vocab)) print("=================================") print("Source: ") print(source_text) print("Target: ") print(target_words) print("Prediction: ") print(pred_words) print("=================================") if remain_words != "": new_y += remain_words + "\n" new_x += " ".join(lookup_words(src, vocab=src_vocab)) + "\n" with open(x_file, 'w') as XFILE: XFILE.write(new_x) with open(y_file, 'w') as YFILE: YFILE.write(new_y) with open('prediction.txt', 'w') as OUT: OUT.write(pred_y)