def main(): model_config = config.get_model_config() num_input_words = model_config['dim_lang'] world_state_size = model_config['dim_world'] num_output_actions = model_config['dim_action'] hidden_size = model_config['hidden_size'] learning_rate = model_config['learning_rate'] encoder = models.EncoderRNN(num_input_words, hidden_size, bidirectionality=True) attn_decoder = models.AttnDecoderRNN(hidden_size, world_state_size, num_output_actions) trainIters(encoder, attn_decoder, 1, learning_rate) id_process = os.getpid() time_current = datetime.datetime.now().isoformat() tag_model = '_PID=' + str(id_process) + '_TIME=' + time_current path_track = './tracks/track' + tag_model + '/' command_mkdir = 'mkdir -p ' + os.path.abspath( path_track ) os.system(command_mkdir) # ENCODER_PATH = path_track + 'encoder.pkl' DECODER_PATH = path_track + 'decoder.pkl' torch.save(encoder, ENCODER_PATH) torch.save(attn_decoder, DECODER_PATH)
def main(): input_lang, output_lang, pairs = utils.prepare_data( lang_name=target_language, _dir='data') encoder = model.EncoderRNN(input_lang.n_words, hidden_size, n_layers) decoder = model.AttentionDecoderRNN(attn_model, hidden_size, output_lang.n_words, n_layers, dropout_p=dropout_p) print("Encoder-Model: ", encoder) print("Decoder-Model: ", decoder) # Initialize optimizers and criterion encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate) criterion = nn.NLLLoss() start = time.time() plot_losses = [] print_loss_total = 0 # Reset every print_every plot_loss_total = 0 # Reset every plot_every # Begin training for epoch in range(1, n_epochs + 1): training_pair = utils.variables_from_pair(random.choice(pairs), input_lang, output_lang) input_variable = training_pair[0] target_variable = training_pair[1] # Run the train step epoch_loss = train(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) print_loss_total += epoch_loss plot_loss_total += epoch_loss if epoch % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 time_since = utils.time_since(start, epoch / n_epochs) print('%s (%d %d%%) %.4f' % (time_since, epoch, epoch / n_epochs * 100, print_loss_avg)) if epoch % plot_every == 0: plot_loss_avg = plot_loss_total / plot_every plot_losses.append(plot_loss_avg) plot_loss_total = 0 save_model(encoder, 'data/encoder_state_' + target_language) save_model(decoder, 'data/decoder_state_' + target_language) save_model(decoder.attention, 'data/decoder_attention_state_' + target_language) utils.show_plot(plot_losses)
def main(): model_config = config.get_model_config() num_input_words = model_config['dim_lang'] world_state_size = model_config['dim_world'] num_output_actions = model_config['dim_action'] hidden_size = model_config['hidden_size'] learning_rate = model_config['learning_rate'] encoder = models.EncoderRNN(num_input_words, hidden_size, bidirectionality=True) attn_decoder = models.AttnDecoderRNN(hidden_size, world_state_size, num_output_actions) trainIters(encoder, attn_decoder, 3, learning_rate)
else: checkpoint = torch.load(loadFilename, map_location=torch.device("cpu")) encoder_sd = checkpoint["en"] decoder_sd = checkpoint["de"] encoder_optimizer_sd = checkpoint["en_opt"] decoder_optimizer_sd = checkpoint["de_opt"] embedding_sd = checkpoint["embedding"] voc = vocab.Voc(corpus_name) voc.__dict__ = checkpoint["voc_dict"] # Initialize word embeddings embedding = nn.Embedding(voc.num_words, hidden_size) embedding.load_state_dict(embedding_sd) # Initialize encoder & decoder models encoder = models.EncoderRNN(hidden_size, embedding, encoder_n_layers, dropout) decoder = models.LuongAttnDecoderRNN(attn_model, embedding, hidden_size, voc.num_words, decoder_n_layers, dropout) # Load trained model params encoder.load_state_dict(encoder_sd) decoder.load_state_dict(decoder_sd) # Use appropriate device encoder = encoder.to(device) decoder = decoder.to(device) # Set dropout layers to eval mode encoder.eval() decoder.eval() searcher = models.GreedySearchDecoder(encoder, decoder)
layers_dec = 1 dropout_p = 0 num_hidden = 512 device = torch.device("cuda") model_path = args.model_path data_path = args.data_path for detector in ['yolo']: for fold in [1, 2, 3]: print(detector + ' fold ' + str(fold)) print('loading model') encoder = models.EncoderRNN(device, num_hidden, layers_enc) encoder = encoder.to(device) encoder = encoder.float() decoder = models.DecoderRNN(device, num_hidden, dropout_p, layers_dec) decoder = decoder.to(device) decoder = decoder.float() try: encoder_path = model_path + '/encoder_' + detector + str( fold) + '_gru.weights' decoder_path = model_path + '/decoder_' + detector + str( fold) + '_gru.weights' encoder.load_state_dict(torch.load(encoder_path)) decoder.load_state_dict(torch.load(decoder_path)) except Exception: print('Failed to load model from ' + str(model_path))
from torch.autograd import Variable from torch import optim import torch.nn.functional as F import load import models import config as cfg import train import communicate as comm corpus_QA, pairs = load.prepareData('augmented_train') load.updateData(corpus_QA, 'dict') # Initialize models encoder = models.EncoderRNN(corpus_QA.n_words, cfg.hidden_size, cfg.n_layers, dropout=cfg.dropout) decoder = models.LuongAttnDecoderRNN(cfg.attn_model, cfg.hidden_size, corpus_QA.n_words, cfg.n_layers, dropout=cfg.dropout) # Initialize optimizers and criterion encoder_optimizer = optim.Adam(encoder.parameters(), lr=cfg.learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=cfg.learning_rate * cfg.decoder_learning_ratio) criterion = nn.CrossEntropyLoss() # Move models to GPU
import utils import models as model from language import Language # Parse argument for input sentence language = 'ben' input_lang, output_lang, pairs = utils.prepare_data(language, _dir='data') attn_model = 'dot' hidden_size = 500 n_layers = 4 dropout_p = 0.05 # Initialize models encoder = model.EncoderRNN(input_lang.n_words, hidden_size, n_layers) decoder = model.AttentionDecoderRNN(attn_model, hidden_size, output_lang.n_words, n_layers, dropout_p=dropout_p) print("Load parameters") # Load model parameters encoder.load_state_dict(torch.load('data/encoder_state_{}'.format(language))) decoder.load_state_dict(torch.load('data/decoder_state_{}'.format(language))) decoder.attention.load_state_dict(torch.load('data/decoder_attention_state_{}'.format(language))) def evaluate(sentence, max_length=10): input_variable = utils.variable_from_sentence(input_lang, sentence) # Run through encoder encoder_hidden = encoder.init_hidden()
def main(): # Get arguments args = parse_args() # Set random seed torch.manual_seed(args.seed) # Cuda use_cuda = False if torch.cuda.is_available(): if not args.cuda: print("WARNING: You have a CUDA device, so you \ should probably run with --cuda") else: use_cuda = True torch.cuda.manual_seed(args.seed) # Load data + text fields print('=' * 89) train_iter, val_iter, test_iter, SRC, TRG = utils.load_dataset( batch_size=args.batch_size, use_pretrained_emb=args.pretrained_emb, save_dir=SAVE_DIR ) print('=' * 89) # Intialize model enc = models.EncoderRNN( input_size=len(SRC.vocab), emb_size=(SRC.vocab.vectors.size(1) if args.pretrained_emb == 'fastText' else args.emb_size), embeddings=(SRC.vocab.vectors if args.pretrained_emb == 'fastText' else None), max_norm=args.emb_maxnorm, padding_idx=SRC.vocab.stoi['<pad>'], hidden_size=args.hidden_size, num_layers=args.num_layers, dropout=args.dropout, bidirectional=args.bidirectional ) decoder = models.AttnDecoderRNN if args.attention else models.DecoderRNN dec = decoder( enc_num_directions=enc.num_directions, enc_hidden_size=args.hidden_size, use_context=args.use_context, input_size=len(TRG.vocab), emb_size=(TRG.vocab.vectors.size(1) if args.pretrained_emb else args.emb_size), embeddings=(TRG.vocab.vectors if args.pretrained_emb else None), max_norm=args.emb_maxnorm, padding_idx=TRG.vocab.stoi['<pad>'], hidden_size=args.hidden_size, num_layers=args.num_layers, dropout=args.dropout, bidirectional=False # args.bidirectional ) model = models.Seq2Seq(enc, dec, use_cuda=use_cuda) if use_cuda: model.cuda() print(model) # Intialize loss criterion = torch.nn.CrossEntropyLoss( ignore_index=TRG.vocab.stoi["<pad>"]) # Create optimizer if args.optimizer == 'Adam': optim = torch.optim.Adam elif args.optimizer == 'Adadelta': optim = torch.optim.Adadelta elif args.optimizer == 'Adagrad': optim = torch.optim.Adagrad else: optim = torch.optim.SGD optimizer = optim(model.parameters(), lr=args.lr) # Create scheduler lambda_lr = lambda epoch: 0.5 if epoch > 8 else 1 scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda_lr) # Train best_val_loss = None fname = './{}/{}.pt'.format(SAVE_DIR, args.save) print('=' * 89) try: for epoch in range(1, args.epochs+1): epoch_start_time = time.time() attns = train(epoch, model, train_iter, criterion, optimizer, use_cuda, args, SRC, TRG) val_loss = evaluate(model, val_iter, criterion, use_cuda) # Log results print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s ' '| valid loss {:5.2f} | valid ppl {:8.2f}'.format( epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) # Save the model if validation loss is best we've seen so far if not best_val_loss or val_loss < best_val_loss: if not os.path.isdir(SAVE_DIR): os.makedirs(SAVE_DIR) torch.save(model, fname) best_val_loss = val_loss # Anneal learning rate scheduler.step() except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # Load the best saved model with open(fname, 'rb') as f: model = torch.load(f) # Run on test data test_loss = evaluate(model, test_iter, criterion, use_cuda) # Log results print('=' * 89) print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format( test_loss, math.exp(test_loss))) print('=' * 89)
if __name__ == '__main__': #hyper-parameters # #other parameters can be found in configs.py, but better to keep default use_model = False #True:use the trained model, else training from scratch hidden_size = 256 #rnn's hidden_size in_embed_dim = 256 #input language word embedding dimension out_embed_dim = 256 #output language word embedding dimension lr = 0.01 n_iters = 80000 print_every = 1000 plot_every = 100 encoder1 = models.EncoderRNN(input_lang.n_words, hidden_size, in_embed_dim).to(device) attn_decoder1 = models.AttnDecoderRNN(hidden_size, output_lang.n_words, out_embed_dim, dropout_p=0.1).to(device) if use_model: encoder1.load_state_dict(torch.load('data/encoder_25.pt')) attn_decoder1.load_state_dict(torch.load('data/attn_decoder_25.pt')) else: trainIters(pairs, input_lang, output_lang, encoder1, attn_decoder1, n_iters=n_iters, print_every=print_every, plot_every=plot_every, learning_rate=lr) evaluateRandomly(pairs, input_lang, output_lang, encoder1, attn_decoder1) evaluateInput(input_lang, output_lang, encoder1, attn_decoder1)
if __name__ == '__main__': # 4 tense into pairs preData(params.PRE_YET) # open file to store file_loss = open('./%d/loss.txt' % params.NUM, 'w', encoding='UTF-8') file_KL = open('./%d/KL.txt' % params.NUM, 'w', encoding='UTF-8') file_bleu = open('./%d/bleu.txt' % params.NUM, 'w', encoding='UTF-8') file_bleuTest1 = open('./%d/bleuTest1.txt' % params.NUM, 'w', encoding='UTF-8') # train encoder = models.EncoderRNN().cuda() decoder = models.DecoderRNN().cuda() encoder_optimizer = optim.Adam(encoder.parameters(), lr=params.LR) decoder_optimizer = optim.Adam(decoder.parameters(), lr=params.LR) cond_embed = nn.Embedding(4, params.COND_EMBEDDING_SIZE).cuda() cond_embed_optimizer = optim.Adam(cond_embed.parameters(), lr=params.LR) for epoch_i in range(1, params.EPOCH + 1): print('\n#####Start epoch %d#####' % epoch_i, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) print('Training...') # load data train_data = tenseLoader('train') train_len = train_data.len()