model.load_state_dict(torch.load(model_file)) print('Load Model') except: print('No Model found') train_tcn = True try: generator.load_state_dict(torch.load('generator.pt')) print("Load Generator Model") except: print("Generator Model Not Found") if train_tcn: model = model.cuda() optimizer_tcn = optim.Adam(model.parameters(),lr=lr_tcn) scheduler_tcn = optim.lr_scheduler.StepLR(optimizer_tcn,step_size=step_size_tcn,gamma=gamma_tcn) for j in range(epochs_tcn): avg_loss = 0 optimizer_tcn.zero_grad() for i in range(nsample//batch_size): itrain = train[i] #[batch_size,1,length] if per_datapoint: for k in range(itrain.size()[2]): ioutput = model(itrain).cuda() #[batch_size,3*ncomponent,length] loss = LogMixGaussian(ioutput,itrain,batch_size,n_components,index=k) loss.backward() optimizer_tcn.step() else: ioutput = model(itrain).cuda() #[batch_size,3*ncomponent,length]
# so we have to transpose the second and third dimensions of the vars input = torch.transpose(input, 1, 2) target = torch.transpose(target, 1, 2) test_input = torch.transpose(test_input, 1, 2) test_target = torch.transpose(test_target, 1, 2) print(input.size(), target.size()) # build the model input_size = 2 # dimension of each sequence element num_hidden = 8 # num hidden units per layer levels = 10 # num layers channel_sizes = [num_hidden] * levels kernel_size = 8 #Use the TCN specified in tcn.py seq = TCN(input_size, input_size, channel_sizes, kernel_size, dropout=0.0) if use_cuda: seq.cuda() seq.double() criterion = nn.MSELoss() # use LBFGS as optimizer since we can load the whole data to train optimizer = optim.LBFGS(seq.parameters(), lr=0.08) #begin to train best_loss = 1e8 EPOCHS = 100 for i in range(EPOCHS): print('EPOCH: ', i) def closure(): optimizer.zero_grad() out = seq(input)