def train(args, model, train_dataset, epoch): with torch.enable_grad(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_dataset.size(0) - 1, args.bptt)): inputSeq, targetSeq = get_batch(args,train_dataset, i) # inputSeq: [ seq_len * batch_size * feature_size ] # targetSeq: [ seq_len * batch_size * feature_size ] # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = model.repackage_hidden(hidden) hidden_ = model.repackage_hidden(hidden) optimizer.zero_grad() '''Loss1: Free running loss''' outVal = inputSeq[0].unsqueeze(0) outVals=[] hids1 = [] for i in range(inputSeq.size(0)): outVal, hidden_, hid = model.forward(outVal, hidden_,return_hiddens=True) outVals.append(outVal) hids1.append(hid) outSeq1 = torch.cat(outVals,dim=0) hids1 = torch.cat(hids1,dim=0) loss1 = criterion(outSeq1.contiguous().view(args.batch_size,-1), targetSeq.contiguous().view(args.batch_size,-1)) '''Loss2: Teacher forcing loss''' outSeq2, hidden, hids2 = model.forward(inputSeq, hidden, return_hiddens=True) loss2 = criterion(outSeq2.contiguous().view(args.batch_size, -1), targetSeq.contiguous().view(args.batch_size, -1)) '''Loss3: Simplified Professor forcing loss''' loss3 = criterion(hids1.contiguous().view(args.batch_size,-1), hids2.contiguous().view(args.batch_size,-1).detach()) '''Total loss = Loss1+Loss2+Loss3''' loss = loss1+loss2+loss3 loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.4f} | ' 'loss {:5.5f} '.format( epoch, batch, len(train_dataset) // args.bptt, elapsed * 1000 / args.log_interval, cur_loss)) total_loss = 0 start_time = time.time()
def train(args, model, train_dataset,epoch): with torch.enable_grad(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_dataset.size(0) - 1, args.bptt)): inputSeq, targetSeq = get_batch(args,train_dataset, i) # inputSeq: [ seq_len * batch_size * feature_size ] # targetSeq: [ seq_len * batch_size * feature_size ] # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = model.repackage_hidden(hidden) hidden_ = model.repackage_hidden(hidden) optimizer.zero_grad() '''Loss1: Free running loss''' outVal = inputSeq[0].unsqueeze(0) outVals=[] hids1 = [] for i in range(inputSeq.size(0)): outVal, hidden_, hid = model.forward(outVal, hidden_,return_hiddens=True) outVals.append(outVal) hids1.append(hid) outSeq1 = torch.cat(outVals,dim=0) hids1 = torch.cat(hids1,dim=0) loss1 = criterion(outSeq1.view(args.batch_size,-1), targetSeq.view(args.batch_size,-1)) '''Loss2: Teacher forcing loss''' outSeq2, hidden, hids2 = model.forward(inputSeq, hidden, return_hiddens=True) loss2 = criterion(outSeq2.view(args.batch_size, -1), targetSeq.view(args.batch_size, -1)) '''Loss3: Simplified Professor forcing loss''' loss3 = criterion(hids1.view(args.batch_size,-1), hids2.view(args.batch_size,-1).detach()) '''Total loss = Loss1+Loss2+Loss3''' loss = loss1+loss2+loss3 loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.4f} | ' 'loss {:5.2f} '.format( epoch, batch, len(train_dataset) // args.bptt, elapsed * 1000 / args.log_interval, cur_loss)) total_loss = 0 start_time = time.time()
def fit_norm_distribution_param(args, model, train_dataset, endPoint=10000): # Turn on evaluation mode which disables dropout. model.eval() pasthidden = model.init_hidden(1) predictions = [] organized = [] errors = [] #out = Variable(test_dataset[0].unsqueeze(0)) for t in range(endPoint): out, hidden = model.forward(Variable(train_dataset[t].unsqueeze(0), volatile=True), pasthidden) predictions.append([]) organized.append([]) errors.append([]) predictions[t].append(out.data.cpu()[0][0][0]) pasthidden = model.repackage_hidden(hidden) for prediction_step in range(1,args.prediction_window_size): out, hidden = model.forward(out, hidden) predictions[t].append(out.data.cpu()[0][0][0]) if t >= args.prediction_window_size: for step in range(args.prediction_window_size): organized[t].append(predictions[step+t-args.prediction_window_size][args.prediction_window_size-1-step]) errors[t] = torch.FloatTensor(organized[t]) - train_dataset[t][0][0] if args.cuda: errors[t] = errors[t].cuda() errors[t] = errors[t].unsqueeze(0) errors_tensor = torch.cat(errors[args.prediction_window_size:],dim=0) mean = errors_tensor.mean(dim=0) cov = errors_tensor.t().mm(errors_tensor)/errors_tensor.size(0) - mean.unsqueeze(1).mm(mean.unsqueeze(0)) # cov: positive-semidefinite and symmetric. return mean, cov
def train(args, model, train_dataset): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_dataset.size(0) - 1, args.bptt)): inputSeq, targetSeq = get_batch(train_dataset, i) # inputSeq: [ seq_len * batch_size * feature_size ] # targetSeq: [ seq_len * batch_size * feature_size ] # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = model.repackage_hidden(hidden) optimizer.zero_grad() USE_TEACHER_FORCING = random.random() < args.teacher_forcing_ratio if USE_TEACHER_FORCING: outSeq, hidden = model.forward(inputSeq, hidden) else: outVal = inputSeq[0].unsqueeze(0) outVals = [] for i in range(inputSeq.size(0)): outVal, hidden = model.forward(outVal, hidden) outVals.append(outVal) outSeq = torch.cat(outVals, dim=0) #print('outSeq:',outSeq.size()) #print('targetSeq:', targetSeq.size()) loss = criterion(outSeq.view(args.batch_size, -1), targetSeq.view(args.batch_size, -1)) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() # for p in model2_for_timeDiff.parameters(): # p.data.add_(-lr, p.grad.data) total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.4f} | ' 'loss {:5.2f} '.format(epoch, batch, len(train_dataset) // args.bptt, elapsed * 1000 / args.log_interval, cur_loss)) total_loss = 0 start_time = time.time()
def evaluate_1step_pred(args, model, test_dataset): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 with torch.no_grad(): hidden = model.init_hidden(args.eval_batch_size) for nbatch, i in enumerate(range(0, test_dataset.size(0) - 1, args.bptt)): inputSeq, targetSeq = get_batch(args,test_dataset, i) outSeq, hidden = model.forward(inputSeq, hidden) loss = criterion(outSeq.view(args.batch_size,-1), targetSeq.view(args.batch_size,-1)) hidden = model.repackage_hidden(hidden) total_loss+= loss.item() return total_loss / nbatch
def evaluate(args, model, test_dataset): # Turn on evaluation mode which disables dropout. model.eval() with torch.no_grad(): total_loss = 0 hidden = model.init_hidden(args.eval_batch_size) nbatch = 1 for nbatch, i in enumerate( range(0, test_dataset.size(0) - 1, args.bptt)): inputSeq, targetSeq = get_batch(args, test_dataset, i) # inputSeq: [ seq_len * batch_size * feature_size ] # targetSeq: [ seq_len * batch_size * feature_size ] hidden_ = model.repackage_hidden(hidden) '''Loss1: Free running loss''' outVal = inputSeq[0].unsqueeze(0) outVals = [] hids1 = [] for i in range(inputSeq.size(0)): outVal, hidden_, hid = model.forward(outVal, hidden_, return_hiddens=True) outVals.append(outVal) hids1.append(hid) outSeq1 = torch.cat(outVals, dim=0) hids1 = torch.cat(hids1, dim=0) loss1 = criterion( outSeq1.contiguous().view(args.batch_size, -1), targetSeq.contiguous().view(args.batch_size, -1)) '''Loss2: Teacher forcing loss''' outSeq2, hidden, hids2 = model.forward(inputSeq, hidden, return_hiddens=True) loss2 = criterion( outSeq2.contiguous().view(args.batch_size, -1), targetSeq.contiguous().view(args.batch_size, -1)) '''Loss3: Simplified Professor forcing loss''' loss3 = criterion(hids1.view(args.batch_size, -1), hids2.view(args.batch_size, -1).detach()) '''Total loss = Loss1+Loss2+Loss3''' loss = loss1 + loss2 + loss3 total_loss += loss.item() return total_loss / (nbatch + 1)
def anomalyScore(args,model,test_dataset,mean,cov,endPoint=10000): # Turn on evaluation mode which disables dropout. model.eval() pasthidden = model.init_hidden(1) predictions = [] organized = [] errors = [] # out = Variable(test_dataset[0].unsqueeze(0)) for t in range(endPoint): out, hidden = model.forward(Variable(test_dataset[t].unsqueeze(0), volatile=True), pasthidden) predictions.append([]) organized.append([]) errors.append([]) predictions[t].append(out.data.cpu()[0][0][0]) pasthidden = model.repackage_hidden(hidden) for prediction_step in range(1, args.prediction_window_size): out, hidden = model.forward(out, hidden) predictions[t].append(out.data.cpu()[0][0][0]) if t >= args.prediction_window_size: for step in range(args.prediction_window_size): organized[t].append( predictions[step + t - args.prediction_window_size][args.prediction_window_size - 1 - step]) organized[t] =torch.FloatTensor(organized[t]).unsqueeze(0) errors[t] = organized[t] - test_dataset[t][0][0] if args.cuda: errors[t] = errors[t].cuda() else: organized[t] = torch.zeros(1,args.prediction_window_size) errors[t] = torch.zeros(1,args.prediction_window_size) if args.cuda: errors[t] = errors[t].cuda() scores = [] for error in errors: mult1 = error-mean.unsqueeze(0) # [ 1 * prediction_window_size ] mult2 = torch.inverse(cov) # [ prediction_window_size * prediction_window_size ] mult3 = mult1.t() # [ prediction_window_size * 1 ] score = torch.mm(mult1,torch.mm(mult2,mult3)) scores.append(score[0][0]) return scores, organized, errors
def evaluate(args, model, test_dataset): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 hidden = model.init_hidden(args.eval_batch_size) for nbatch, i in enumerate(range(0, test_dataset.size(0) - 1, args.bptt)): inputSeq, targetSeq = get_batch(test_dataset, i, evaluation=True) # outVal = inputSeq[0].unsqueeze(0) # outVals = [] # for i in range(inputSeq.size(0)): # outVal, hidden = model.forward(outVal, hidden) # outVals.append(outVal) # outSeq = torch.cat(outVals, dim=0) outSeq, hidden = model.forward(inputSeq, hidden) loss = criterion(outSeq.view(args.batch_size,-1), targetSeq.view(args.batch_size,-1)) hidden = model.repackage_hidden(hidden) total_loss+= loss.data return total_loss[0] / nbatch
def evaluate(args, model, test_dataset): # Turn on evaluation mode which disables dropout. model.eval() with torch.no_grad(): total_loss = 0 hidden = model.init_hidden(args.eval_batch_size) nbatch = 1 for nbatch, i in enumerate(range(0, test_dataset.size(0) - 1, args.bptt)): inputSeq, targetSeq = get_batch(args,test_dataset, i) # inputSeq: [ seq_len * batch_size * feature_size ] # targetSeq: [ seq_len * batch_size * feature_size ] hidden_ = model.repackage_hidden(hidden) '''Loss1: Free running loss''' outVal = inputSeq[0].unsqueeze(0) outVals=[] hids1 = [] for i in range(inputSeq.size(0)): outVal, hidden_, hid = model.forward(outVal, hidden_,return_hiddens=True) outVals.append(outVal) hids1.append(hid) outSeq1 = torch.cat(outVals,dim=0) hids1 = torch.cat(hids1,dim=0) loss1 = criterion(outSeq1.view(args.batch_size,-1), targetSeq.view(args.batch_size,-1)) '''Loss2: Teacher forcing loss''' outSeq2, hidden, hids2 = model.forward(inputSeq, hidden, return_hiddens=True) loss2 = criterion(outSeq2.view(args.batch_size, -1), targetSeq.view(args.batch_size, -1)) '''Loss3: Simplified Professor forcing loss''' loss3 = criterion(hids1.view(args.batch_size,-1), hids2.view(args.batch_size,-1).detach()) '''Total loss = Loss1+Loss2+Loss3''' loss = loss1+loss2+loss3 total_loss += loss.item() return total_loss / (nbatch+1)