len(dataset.out_vocab[0]), args.n_layers, args.dropout, ) # Initialize optimizers and criterion # encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.learning_rate) # decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.learning_rate * decoder_learning_ratio) encoder_optimizer = optim.Adadelta(encoder.parameters()) decoder_optimizer = optim.Adadelta(decoder.parameters()) criterion = nn.CrossEntropyLoss() # Move models to GPU if args.USE_CUDA: encoder.cuda() decoder.cuda() # train(dataset, # args.batch_size, # args.n_epochs, # encoder, # decoder, # encoder_optimizer, # decoder_optimizer, # criterion, # 'checkpoints/pov', # lang) # evaluate # find the last encoder state
def train(x, y, optimizer=optim.Adam, criterion=nn.MSELoss(), n_steps=100, attn_model="general", hidden_size=128, n_layers=1, dropout=0, batch_size=50, elr=0.001, dlr=0.005, clip=50.0, print_every=10, teacher_forcing_ratio=lambda x: 1 if x < 10 else 0): # Configure training/optimization encoder_learning_rate = elr decoder_learning_ratio = dlr # Initialize models encoder = EncoderRNN(1, hidden_size, n_layers, dropout=dropout) decoder = LuongAttnDecoderRNN(attn_model, 1, hidden_size, n_layers, dropout=dropout) # Initialize optimizers and criterion encoder_optimizer = optimizer(encoder.parameters(), lr=encoder_learning_rate) decoder_optimizer = optimizer(decoder.parameters(), lr=decoder_learning_ratio) # Move models to GPU if USE_CUDA: encoder.cuda() decoder.cuda() # Begin! print_loss_total = 0 step = 0 while step < n_steps: step += 1 # Get training data for this cycle batch_idx = np.random.randint(0, x.shape[1], batch_size) input_batches, target_batches = x[:, batch_idx], y[:, batch_idx] # Run the train function loss, _ = _train(input_batches, target_batches, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, teacher_forcing_ratio=teacher_forcing_ratio(step), clip=clip) # print(np.mean(np.square((output.data.cpu().numpy() - series[-20:, batch_idx])))) # Keep track of loss print_loss_total += loss if step % print_every == 0: print_loss_avg = print_loss_total / print_every print_loss_total = 0 print_summary = '(%d %d%%) %.4f' % (step, step / n_steps * 100, print_loss_avg) print(print_summary) return encoder, decoder