def optimize(lr, clip): print("Optimizing with " + str(lr) + "lr, " + str(args.epochs) + " epochs, " + str(clip) + " clip") num_chans = [args.nhid] * (args.levels - 1) + [args.emsize] model = TCN(args, n_words, num_chans) if args.cuda: model.cuda() print("Parameters: " + str(sum(p.numel() for p in model.parameters()))) torch.backends.cudnn.benchmark = True # This makes dilated conv much faster for CuDNN 7.5 optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) # Start training loop best_model_name = "model_" + args.experiment_name + ".pt" best_vloss = 1e8 all_vloss = [] for epoch in range(1, args.epochs+1): epoch_start_time = time.time() try: train(model, optimizer, lr, epoch, clip) except OverflowError: return {'status': 'fail'} print("Validating...") val_loss = evaluate(model, val_data) if np.isnan(val_loss) or val_loss > 100: return {'status' : 'fail'} print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) # Save the model if the validation loss is the best we've seen so far. if val_loss < best_vloss: with open(best_model_name, 'wb') as f: print('Save model!\n') torch.save(model, f) best_vloss = val_loss # Anneal the learning rate if the validation loss plateaus if epoch > 10 and val_loss >= max(all_vloss[-5:]): lr = lr / 2. for param_group in optimizer.param_groups: param_group['lr'] = lr all_vloss.append(val_loss) return {"status" : "ok", "loss" : best_vloss, "model_name" : best_model_name}
def optimize(lr, clip): print("Optimizing with " + str(lr) + "lr, " + str(args.epochs) + " epochs, " + str(clip) + " clip") # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) print(args) n_channels = [args.nhid] * args.levels model = TCN(args.model, input_size, input_size, n_channels, args.ksize, dropout=args.dropout) print('Parameter count: ', str(sum(p.numel() for p in model.parameters()))) if args.cuda: model.cuda() #summary(model, (193, 88)) optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) best_vloss = 1e8 vloss_list = [] model_name = "model_" + str(args.data) + "_" + str( args.experiment_name) + ".pt" for ep in range(1, args.epochs + 1): train(model, ep, lr, optimizer, clip) vloss = evaluate(model, X_valid, name='Validation') if np.isnan(vloss) or vloss > 1000: return {'status': 'fail'} if vloss < best_vloss: with open(model_name, "wb") as f: torch.save(model, f) print("Saved model!\n") best_vloss = vloss if ep > 10 and vloss > max(vloss_list[-10:]): lr /= 2 for param_group in optimizer.param_groups: param_group['lr'] = lr vloss_list.append(vloss) return {'status': 'ok', 'loss': best_vloss, 'model_name': model_name}
def optimize(lr, clip): print("Optimizing with " + str(lr) + "lr, " + str(args.epochs) + " epochs, " + str(clip) + " clip") num_chans = [args.nhid] * (args.levels - 1) + [args.emsize] model = TCN(args, n_characters, num_chans) if args.cuda: model.cuda() print("Parameters: " + str(sum(p.numel() for p in model.parameters()))) torch.backends.cudnn.benchmark = True # This makes dilated conv much faster for CuDNN 7.5 optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) # Start training loop all_losses = [] best_vloss = 1e7 for epoch in range(1, args.epochs + 1): try: train(model, optimizer, clip, lr, epoch) except OverflowError: return {'status': 'fail'} vloss = evaluate(model, val_data) if np.isnan(vloss) or vloss > 1000: return {'status': 'fail'} print('-' * 89) print('| End of epoch {:3d} | valid loss {:5.3f} | valid bpc {:8.3f}'. format(epoch, vloss, vloss / math.log(2))) if epoch > 10 and vloss > max(all_losses[-5:]): lr = lr / 2. for param_group in optimizer.param_groups: param_group['lr'] = lr all_losses.append(vloss) if vloss < best_vloss: print("Saving...") with open("model_" + args.experiment_name + ".pt", "wb") as f: torch.save(model, f) print("Saved model!\n") best_vloss = vloss return { "status": "ok", "loss": best_vloss, "model_name": "model_" + args.experiment_name + ".pt" }
num_workers=num_threds, drop_last=False) channel_sizes = [args.nhid] * args.levels kernel_size = args.ksize model_T = TCN(input_channels_T, n_classes, channel_sizes, kernel_size=kernel_size, dropout=args.dropout) model_E = TCN(input_channels_E, n_classes, channel_sizes, kernel_size=kernel_size, dropout=args.dropout) model_G = TCN(input_channels_G, n_classes, channel_sizes, kernel_size=kernel_size, dropout=args.dropout) if args.cuda: model_T.cuda() model_E.cuda() model_G.cuda() optimizer = getattr(optim, args.optim)([{'params': model_T.parameters(), 'lr': args.lr_T}, {'params': model_E.parameters(), 'lr': args.lr_E}, {'params': model_G.parameters(), 'lr': args.lr_G} ])#,momentum=0.9) def save_network(network, network_label, epoch_label): save_filename = 'net_epoch_%d_id_%s.pth' % (epoch_label, network_label) save_path = os.path.join(args.savedir, save_filename) torch.save(network.state_dict(), save_path) print ('saved net: %s' % save_path) def train(ep): global steps total_loss = 0 model_T_loss = 0 model_E_loss = 0
# Test set trueStateTEST = trueStateTEST.cuda() measuredStateTEST = measuredStateTEST.cuda() ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### ~~~~~~~~~~~~~~~~~~~~~~ OPTIMIZER ~~~~~~~~~~~~~~~~~~~~~~~~ ### if not testSession: # Create the optimizer optimizerParameters = {'optim': optimMethod, 'lr': lr} else: # Loading the optimizer parameters to use optimMethod = modelContext['optimizer_parameters']['optim'] lr = modelContext['optimizer_parameters']['lr'] # Initializing the optimizer optimizer = getattr(optim, optimMethod)(model.parameters(), lr=lr) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### ~~~~~~~~~~~~~~~~~~~~~~ TRAINING ~~~~~~~~~~~~~~~~~~~~~~~~~ ### def train(epoch): # Initialize training model and parameters model.train() total_loss = 0 ################################ # Training loop - run until we process every series of data for i in range(0, trainSeriesLength):
return sum(p.numel() for p in model.parameters() if p.requires_grad) total_params = count_parameters(model) print("Total params are ", total_params) if args.cuda: model.cuda() train_x = train_x.cuda() train_y = train_y.cuda() test_x = test_x.cuda() test_y = test_y.cuda() criterion = nn.CrossEntropyLoss() lr = args.lr optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) def evaluate(): model.eval() out = model(test_x.unsqueeze(1).contiguous()) loss = criterion(out.view(-1, n_classes), test_y.view(-1)) pred = out.view(-1, n_classes).data.max(1, keepdim=True)[1] correct = pred.eq(test_y.data.view_as(pred)).cpu().sum() counter = out.view(-1, n_classes).size(0) print('\nTest set: Average loss: {:.8f} | Accuracy: {:.4f}\n'.format( loss.item(), 100. * float(correct) / counter)) return loss.item() def train(ep):
training_dataloader = torch.utils.data.DataLoader(training_dataset, collate_fn=collate_fn_padd, batch_size=batch_size, shuffle=True, drop_last=False) test_dataset = TCNDataset(training=False) test_dataloader = torch.utils.data.DataLoader(test_dataset, collate_fn=collate_fn_padd, batch_size=batch_size, shuffle=False, drop_last=False) single_TCN = TCN() single_TCN = single_TCN.to(device) single_TCN_optimizer = torch.optim.Adam(single_TCN.parameters(), lr=0.001) multi_stage_TCN = MultiStageTCN() multi_stage_TCN = multi_stage_TCN(device) multi_stage_TCN_optimizer = torch.optim.Adam(multi_stage_TCN.parameters(), lr=0.001) multi_stage_TCN_video_loss = MultiStageTCN() multi_stage_TCN_video_loss = multi_stage_TCN_video_loss.to(device) multi_stage_TCN_optimizer = torch.optim.Adam( multi_stage_TCN_video_loss.parameters(), lr=0.001) parallel_TCNs = ParallelTCNs() parallel_TCNs = parallel_TCNs.to(device) parallel_TCNs_optimizer = torch.optim.Adam(parallel_TCNs.parameters(), lr=0.001)
pad_idx=symbols['<pad>'], dropout=dropout_rate, emb_dropout=emb_dropout_rate) model = model.to(device) print(model) # folder to save model save_path = 'model' if not os.path.exists(save_path): os.makedirs(save_path) # objective function learning_rate = 4 criterion = nn.CrossEntropyLoss(size_average=False, ignore_index=symbols['<pad>']) optimizer = optim.SGD(model.parameters(), lr=learning_rate) #Adam # negative log likelihood def NLL(logp, target, length): target = target[:, :torch.max(length).item()].contiguous().view(-1) logp = logp[:, :torch.max(length).item(), :].contiguous().view( -1, logp.size(-1)) # logp = logp.view(-1, logp.size(-1)) return criterion(logp, target) # training setting epoch = 20 print_every = 50 # training interface