outputs, attention = model(inputs, None, None) outputs = torch.transpose(outputs, 0, 1) zip_list = zip(outputs.tolist(), order_list) sorted_list = sorted(zip_list, key=lambda x: x[1], reverse=False) output = torch.tensor([x[0] for x in sorted_list]) for i in range(output.size(0)): end_pos = output[i].tolist().index(dl.char2idx['?']) if dl.char2idx['?'] in output[i].tolist() \ else output[i].size(0) pred = "".join(dl.idx2char[o] for o in output[i].tolist()[:end_pos]) pred_final.append(pred) result = pd.DataFrame(np.array(pred_final)) result.columns = ['Predicted'] result.to_csv('submission.csv', index_label='Id') if __name__ == "__main__": train_loader, val_loader, test_loader = dl.get_loaders() model = models.Seq2Seq(40, 128, dl.get_char_length()) model = model.to(device) if train_mode: train(model, train_loader, val_loader) with torch.no_grad(): run_test(model, test_loader)
def run(): Seed = 1234 random.seed(Seed) np.random.seed(Seed) torch.manual_seed(Seed) torch.cuda.manual_seed(Seed) torch.backends.cudnn.deterministic = True train, valid, test, SRC, TRG = dataset.create_dataset() train_iterator, valid_iterator, test_iterator = BucketIterator.splits( (train, valid, test), sort_key=lambda x: len(x.source), batch_size=config.BATCH_SIZE, device=config.device) INPUT_DIM = len(SRC.vocab) OUTPUT_DIM = len(TRG.vocab) HID_DIM = 256 ENC_LAYERS = 3 DEC_LAYERS = 3 ENC_HEADS = 8 DEC_HEADS = 8 ENC_PF_DIM = 512 DEC_PF_DIM = 512 ENC_DROPOUT = 0.1 DEC_DROPOUT = 0.1 enc = model.Encoder(INPUT_DIM, HID_DIM, ENC_LAYERS, ENC_HEADS, ENC_PF_DIM, ENC_DROPOUT, config.device) dec = model.Decoder(OUTPUT_DIM, HID_DIM, DEC_LAYERS, DEC_HEADS, DEC_PF_DIM, DEC_DROPOUT, config.device) SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token] TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token] model_tr = model.Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, config.device).to(config.device) def initialize_weights(m): if hasattr(m, 'weight') and m.weight.dim() > 1: nn.init.xavier_uniform_(m.weight.data) model_tr.apply(initialize_weights) optimizer = optim.Adam(model_tr.parameters(), lr=config.LEARNING_RATE) TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token] criterion = nn.CrossEntropyLoss(ignore_index=TRG_PAD_IDX) def epoch_time(start_time, end_time): elapsed_time = end_time - start_time elapsed_mins = int(elapsed_time / 60) elapsed_secs = int(elapsed_time - (elapsed_mins * 60)) return elapsed_mins, elapsed_secs if (args.action == 'train'): best_valid_loss = float('inf') for epoch in tqdm(range(config.N_EPOCHS)): start_time = time.time() train_loss = config.train(model_tr, train_iterator, optimizer, criterion, config.CLIP) valid_loss = config.evaluate(model_tr, valid_iterator, criterion) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save(model_tr.state_dict(), 'model.bin') with open(config.RESULTS_SAVE_FILE, 'a') as f: print( f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s', file=f) print( f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}', file=f) print( f'\t Val. Loss: {valid_loss:.3f} | Val. PPL: {math.exp(valid_loss):7.3f}', file=f) elif (args.action == 'test'): model_tr.load_state_dict(torch.load('model.bin')) test_loss, t, o = engine.test(model_tr, test_iterator, criterion, TRG) metric_val = 0 for i in range(len(t)): metric_val = metric_val + metric_utils.compute_metric( o[i], 1.0, t[i]) print('Nl2Cmd Metric | ', metric_val / len(t)) print( f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |' ) elif (args.action == 'save_vocab'): print('Source Vocab Length', len(SRC.vocab)) print('Target vocab length', len(TRG.vocab)) s1 = '\n'.join(k for k in SRC.vocab.itos) s2 = '\n'.join(k for k in TRG.vocab.itos) with open('NL_vocabulary.txt', 'w') as f: f.write(s1) with open('Bash_vocabulary.txt', 'w') as f: f.write(s2)
import tensorflow as tf import os import input_data import model EPOCHS = 200 BATCH_SIZE = 20 SHUFFLE_SIZE = 50 VOCAB_SIZE = 284 EMBEDDING_SIZE = 256 LEARNING_RATE = 0.001 SAVE_DIR = "./log/" if __name__ == "__main__": QA_model = model.Seq2Seq(BATCH_SIZE, VOCAB_SIZE, EMBEDDING_SIZE, LEARNING_RATE, True) summary_op = tf.summary.merge_all() saver = tf.train.Saver() sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) tra_summary_writer = tf.summary.FileWriter(os.path.join(SAVE_DIR, "tra"), sess.graph) step = 0 for e in range(EPOCHS): for encoder_input_data, decoder_input_data, decoder_input_label in input_data.get_batch("data/text_list", BATCH_SIZE, SHUFFLE_SIZE): step += 1 _, tra_loss, summary_str = sess.run([QA_model.train_op, QA_model.loss, summary_op], feed_dict={ QA_model.encoder_input_data: encoder_input_data, QA_model.decoder_input_data: decoder_input_data,