def train(model): model = model.train() optimizer.zero_grad() losses = [] accurate_preds = 0 all_targets = [] all_predictions = [] for d in train_loader: inputs = d['input_ids'].to(device) masks = d['attention_mask'].to(device) all_targets.extend(list(d['targets'].squeeze().numpy())) targets = d['targets'].to(device) outputs = model( input_ids=inputs, attention_mask=masks ) _, preds = torch.max(outputs, dim=1) loss = criterion(outputs, targets) all_predictions.extend(list(preds.cpu().squeeze().numpy())) accurate_preds += torch.sum(preds == targets) losses.append(loss.item()) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) optimizer.step() scheduler.step() return accurate_preds / train_len, np.mean(losses), all_targets, all_predictions
def train(args, data): loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) for epoch in range(args.epochs): running_loss = 0.0 for i, d in enumerate(data.trainloader, 0): inputs, labels = d inputs, labels = inputs.cuda(), labels.cuda() # Setting the gradients to 0 optimizer.zero_grad() outputs = model(inputs) loss = loss_function(outputs, labels) # Calculate the gradient based on all the parameters loss.backward() # Update all the parameters based on the gradients optimizer.step() running_loss += loss.item() if i % 2000 == 1999: # print every 2000 mini-batches print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000)) running_loss = 0.0
def train(args, model, train_dataset, epoch): with torch.enable_grad(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_dataset.size(0) - 1, args.bptt)): inputSeq, targetSeq = get_batch(args,train_dataset, i) # inputSeq: [ seq_len * batch_size * feature_size ] # targetSeq: [ seq_len * batch_size * feature_size ] # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = model.repackage_hidden(hidden) hidden_ = model.repackage_hidden(hidden) optimizer.zero_grad() '''Loss1: Free running loss''' outVal = inputSeq[0].unsqueeze(0) outVals=[] hids1 = [] for i in range(inputSeq.size(0)): outVal, hidden_, hid = model.forward(outVal, hidden_,return_hiddens=True) outVals.append(outVal) hids1.append(hid) outSeq1 = torch.cat(outVals,dim=0) hids1 = torch.cat(hids1,dim=0) loss1 = criterion(outSeq1.contiguous().view(args.batch_size,-1), targetSeq.contiguous().view(args.batch_size,-1)) '''Loss2: Teacher forcing loss''' outSeq2, hidden, hids2 = model.forward(inputSeq, hidden, return_hiddens=True) loss2 = criterion(outSeq2.contiguous().view(args.batch_size, -1), targetSeq.contiguous().view(args.batch_size, -1)) '''Loss3: Simplified Professor forcing loss''' loss3 = criterion(hids1.contiguous().view(args.batch_size,-1), hids2.contiguous().view(args.batch_size,-1).detach()) '''Total loss = Loss1+Loss2+Loss3''' loss = loss1+loss2+loss3 loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.4f} | ' 'loss {:5.5f} '.format( epoch, batch, len(train_dataset) // args.bptt, elapsed * 1000 / args.log_interval, cur_loss)) total_loss = 0 start_time = time.time()
def train(args, model, train_dataset,epoch): with torch.enable_grad(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_dataset.size(0) - 1, args.bptt)): inputSeq, targetSeq = get_batch(args,train_dataset, i) # inputSeq: [ seq_len * batch_size * feature_size ] # targetSeq: [ seq_len * batch_size * feature_size ] # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = model.repackage_hidden(hidden) hidden_ = model.repackage_hidden(hidden) optimizer.zero_grad() '''Loss1: Free running loss''' outVal = inputSeq[0].unsqueeze(0) outVals=[] hids1 = [] for i in range(inputSeq.size(0)): outVal, hidden_, hid = model.forward(outVal, hidden_,return_hiddens=True) outVals.append(outVal) hids1.append(hid) outSeq1 = torch.cat(outVals,dim=0) hids1 = torch.cat(hids1,dim=0) loss1 = criterion(outSeq1.view(args.batch_size,-1), targetSeq.view(args.batch_size,-1)) '''Loss2: Teacher forcing loss''' outSeq2, hidden, hids2 = model.forward(inputSeq, hidden, return_hiddens=True) loss2 = criterion(outSeq2.view(args.batch_size, -1), targetSeq.view(args.batch_size, -1)) '''Loss3: Simplified Professor forcing loss''' loss3 = criterion(hids1.view(args.batch_size,-1), hids2.view(args.batch_size,-1).detach()) '''Total loss = Loss1+Loss2+Loss3''' loss = loss1+loss2+loss3 loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.4f} | ' 'loss {:5.2f} '.format( epoch, batch, len(train_dataset) // args.bptt, elapsed * 1000 / args.log_interval, cur_loss)) total_loss = 0 start_time = time.time()
def train(args, model, train_dataset): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_dataset.size(0) - 1, args.bptt)): inputSeq, targetSeq = get_batch(train_dataset, i) # inputSeq: [ seq_len * batch_size * feature_size ] # targetSeq: [ seq_len * batch_size * feature_size ] # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = model.repackage_hidden(hidden) optimizer.zero_grad() USE_TEACHER_FORCING = random.random() < args.teacher_forcing_ratio if USE_TEACHER_FORCING: outSeq, hidden = model.forward(inputSeq, hidden) else: outVal = inputSeq[0].unsqueeze(0) outVals = [] for i in range(inputSeq.size(0)): outVal, hidden = model.forward(outVal, hidden) outVals.append(outVal) outSeq = torch.cat(outVals, dim=0) #print('outSeq:',outSeq.size()) #print('targetSeq:', targetSeq.size()) loss = criterion(outSeq.view(args.batch_size, -1), targetSeq.view(args.batch_size, -1)) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) optimizer.step() # for p in model2_for_timeDiff.parameters(): # p.data.add_(-lr, p.grad.data) total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.4f} | ' 'loss {:5.2f} '.format(epoch, batch, len(train_dataset) // args.bptt, elapsed * 1000 / args.log_interval, cur_loss)) total_loss = 0 start_time = time.time()
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() optimizer.step() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) # for p in model.parameters(): # p.data.add_(-lr, p.grad.data) total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time try: print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) except OverflowError as err: print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() data.to("cpu") targets.to("cpu")
def train(model, iterator, optimizer, criterion, clip): model.train() epoch_loss = 0 len_iterator = 0 for i, batch in enumerate(iterator): src = batch.src trg = batch.trg optimizer.zero_grad() output = model(src, trg) loss = criterion(output, trg) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), clip) optimizer.step() epoch_loss += loss.item() len_iterator += 1 return epoch_loss / len_iterator
def main(): model = Net() if torch.cuda.is_available(): model.cuda() else: pass model.apply(weights_init) if args.resume: if isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}'" .format(args.resume)) else: print("=> no checkpoint found at '{}'".format(args.resume)) # 数据处理 # 直接在train里面处理 # dataParser = DataParser(batch_size) loss_function = nn.L1Loss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) # train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,milestones=settings.MILESTONES,gamma=0.2)#learning rate decay scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) log = Logger(join(TMP_DIR, '%s-%d-log.txt' % ('Adam', args.lr))) sys.stdout = log train_loss = [] train_loss_detail = [] for epoch in range(args.start_epoch, args.maxepoch): if epoch == 0: print("Performing initial testing...") # 暂时空着 tr_avg_loss, tr_detail_loss = train(model = model,optimizer = optimizer,epoch= epoch,save_dir=join(TMP_DIR, 'epoch-%d-training-record' % epoch)) test() log.flush() # Save checkpoint save_file = os.path.join(TMP_DIR, 'checkpoint_epoch{}.pth'.format(epoch)) save_checkpoint({'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}) scheduler.step() # 自动调整学习率 train_loss.append(tr_avg_loss) train_loss_detail += tr_detail_loss
def train(): model.train() # Turn on the train mode total_loss = 0.0 start_time = time.time() batches = np.random.permutation( range(0, train_data.size(0) - seq_len, seq_len)) for batch_counter, i in enumerate(batches): data, targets = get_batch(train_data, i) optimizer.zero_grad() src_mask = model.generate_square_subsequent_mask( data.size(0)).to(device) output = model( data, src_mask=src_mask, ) loss = criterion(output, targets) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() total_loss += loss.item() log_interval = 100 if batch_counter % log_interval == 0 and batch_counter > 0: cur_loss = total_loss / log_interval elapsed = time.time() - start_time print("| epoch {:3d} | {:5d}/{:5d} batches | {:5.6f}" "| ms/batch {:5.4f} | " "loss {:5.4f}".format( epoch, batch_counter, len(train_data) // seq_len, scheduler.get_last_lr()[0], elapsed * 1000 / log_interval, cur_loss, )) total_loss = 0 start_time = time.time()
def main(config): print('loading dataset') if config.dataset_path == None: if config.model == 'cbow': nlp_dataset = dataset.cbow_dataset.CBOWDataset(config) elif config.model == 'skip-gram': nlp_dataset = dataset.skipgram_dataset.SkipGramDataset(config) elif config.model == 'neg-sampling': nlp_dataset = dataset.negsampling_dataset.NegSamplingDataset( config) elif config.model == 'fast-text': nlp_dataset = dataset.fasttext_dataset.FastTextDataset(config) else: raise AssertionError('dataset should be one of w2v models.') else: with open(config.dataset_path, 'rb') as f: nlp_dataset = pickle.load(f) dataloader = DataLoader( nlp_dataset, batch_size=config.batch_size, shuffle=False, num_workers=0, collate_fn=collate_fn, ) print('dataloader made') if config.model == 'neg-sampling': trainer = NegSamplingTrainer(dataloader, config) else: model = EmbeddingModule(len(nlp_dataset), config).to(config.device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) trainer = Trainer(dataloader, model, criterion, optimizer, config) print('start training') trainer.train() model = trainer.model with open('./checkpoints/w2v' + config.model + '_model.pkl', 'wb') as f: pickle.dump(model, f)
train_dataset = batchify(TimeseriesData.trainData, 1)[:10000] test_dataset = batchify(TimeseriesData.testData, 1) ############################################################################### # Build the model ############################################################################### model = model.RNNPredictor(rnn_type = args.model, enc_inp_size=3, rnn_inp_size = args.emsize, rnn_hid_size = args.nhid, dec_out_size=3, nlayers = args.nlayers,) if args.cuda: model.cuda() optimizer = optim.Adam(model.parameters(), lr= 0.0001) criterion = nn.MSELoss() ############################################################################### # Training code ############################################################################### def fit_norm_distribution_param(args, model, train_dataset, endPoint=10000): # Turn on evaluation mode which disables dropout. model.eval() pasthidden = model.init_hidden(1) predictions = [] organized = [] errors = [] #out = Variable(test_dataset[0].unsqueeze(0)) for t in range(endPoint):
def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad)
def main(): """Run training""" from model import model parser = argparse.ArgumentParser( description='PyTorch RNN Prediction Model on Time-series Dataset') parser.add_argument( '--data', type=str, default='ecg', help= 'type of the dataset (ecg, gesture, power_demand, space_shuttle, respiration, nyc_taxi' ) parser.add_argument('--filename', type=str, default='chfdb_chf13_45590.pkl', help='filename of the dataset') parser.add_argument( '--model', type=str, default='LSTM', help='type of recurrent net (RNN_TANH, RNN_RELU, LSTM, GRU, SRU)') parser.add_argument('--augment', type=bool, default=True, help='augment') parser.add_argument('--emsize', type=int, default=32, help='size of rnn input features') parser.add_argument('--nhid', type=int, default=32, help='number of hidden units per layer') parser.add_argument('--nlayers', type=int, default=2, help='number of layers') parser.add_argument('--res_connection', action='store_true', help='residual connection') parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate') parser.add_argument('--weight_decay', type=float, default=1e-4, help='weight decay') parser.add_argument('--clip', type=float, default=10, help='gradient clipping') parser.add_argument('--epochs', type=int, default=400, help='upper epoch limit') parser.add_argument('--batch_size', type=int, default=64, metavar='N', help='batch size') parser.add_argument('--eval_batch_size', type=int, default=64, metavar='N', help='eval_batch size') parser.add_argument('--bptt', type=int, default=50, help='sequence length') parser.add_argument('--teacher_forcing_ratio', type=float, default=0.7, help='teacher forcing ratio (deprecated)') parser.add_argument('--dropout', type=float, default=0.2, help='dropout applied to layers (0 = no dropout)') parser.add_argument( '--tied', action='store_true', help='tie the word embedding and softmax weights (deprecated)') parser.add_argument('--seed', type=int, default=1111, help='random seed') parser.add_argument('--device', type=str, default='cuda', help='cuda or cpu') parser.add_argument('--log_interval', type=int, default=10, metavar='N', help='report interval') parser.add_argument('--save_interval', type=int, default=10, metavar='N', help='save interval') parser.add_argument('--save_fig', action='store_true', help='save figure') parser.add_argument( '--resume', '-r', help= 'use checkpoint model parameters as initial parameters (default: False)', action="store_true") parser.add_argument( '--pretrained', '-p', help= 'use checkpoint model parameters and do not train anymore (default: False)', action="store_true") parser.add_argument('--prediction_window_size', type=int, default=10, help='prediction_window_size') args = parser.parse_args() # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) ############################################################################### # Load data ############################################################################### TimeseriesData = preprocess_data.PickleDataLoad( data_type=args.data, filename=args.filename, augment_test_data=args.augment) train_dataset = TimeseriesData.batchify(args, TimeseriesData.trainData, args.batch_size) test_dataset = TimeseriesData.batchify(args, TimeseriesData.testData, args.eval_batch_size) gen_dataset = TimeseriesData.batchify(args, TimeseriesData.testData, 1) ############################################################################### # Build the model ############################################################################### feature_dim = TimeseriesData.trainData.size(1) model = model.RNNPredictor(rnn_type=args.model, enc_inp_size=feature_dim, rnn_inp_size=args.emsize, rnn_hid_size=args.nhid, dec_out_size=feature_dim, nlayers=args.nlayers, dropout=args.dropout, tie_weights=args.tied, res_connection=args.res_connection).to( args.device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) criterion = nn.MSELoss() ############################################################################### # Training code ############################################################################### def get_batch(args, source, i): seq_len = min(args.bptt, len(source) - 1 - i) data = source[i:i + seq_len] # [ seq_len * batch_size * feature_size ] target = source[i + 1:i + 1 + seq_len] # [ (seq_len x batch_size x feature_size) ] return data, target def generate_output(args, epoch, model, gen_dataset, disp_uncertainty=True, startPoint=500, endPoint=3500): if args.save_fig: # Turn on evaluation mode which disables dropout. model.eval() hidden = model.init_hidden(1) outSeq = [] upperlim95 = [] lowerlim95 = [] with torch.no_grad(): for i in range(endPoint): if i >= startPoint: # if disp_uncertainty and epoch > 40: # outs = [] # model.train() # for i in range(20): # out_, hidden_ = model.forward(out+0.01*Variable(torch.randn(out.size())).cuda(),hidden,noise=True) # outs.append(out_) # model.eval() # outs = torch.cat(outs,dim=0) # out_mean = torch.mean(outs,dim=0) # [bsz * feature_dim] # out_std = torch.std(outs,dim=0) # [bsz * feature_dim] # upperlim95.append(out_mean + 2.58*out_std/np.sqrt(20)) # lowerlim95.append(out_mean - 2.58*out_std/np.sqrt(20)) out, hidden = model.forward(out, hidden) #print(out_mean,out) else: out, hidden = model.forward( gen_dataset[i].unsqueeze(0), hidden) outSeq.append(out.data.cpu()[0][0].unsqueeze(0)) outSeq = torch.cat(outSeq, dim=0) # [seqLength * feature_dim] target = preprocess_data.reconstruct(gen_dataset.cpu(), TimeseriesData.mean, TimeseriesData.std) outSeq = preprocess_data.reconstruct(outSeq, TimeseriesData.mean, TimeseriesData.std) # if epoch>40: # upperlim95 = torch.cat(upperlim95, dim=0) # lowerlim95 = torch.cat(lowerlim95, dim=0) # upperlim95 = preprocess_data.reconstruct(upperlim95.data.cpu().numpy(),TimeseriesData.mean,TimeseriesData.std) # lowerlim95 = preprocess_data.reconstruct(lowerlim95.data.cpu().numpy(),TimeseriesData.mean,TimeseriesData.std) plt.figure(figsize=(15, 5)) for i in range(target.size(-1)): plt.plot(target[:, :, i].numpy(), label='Target' + str(i), color='black', marker='.', linestyle='--', markersize=1, linewidth=0.5) plt.plot(range(startPoint), outSeq[:startPoint, i].numpy(), label='1-step predictions for target' + str(i), color='green', marker='.', linestyle='--', markersize=1.5, linewidth=1) # if epoch>40: # plt.plot(range(startPoint, endPoint), upperlim95[:,i].numpy(), label='upperlim'+str(i), # color='skyblue', marker='.', linestyle='--', markersize=1.5, linewidth=1) # plt.plot(range(startPoint, endPoint), lowerlim95[:,i].numpy(), label='lowerlim'+str(i), # color='skyblue', marker='.', linestyle='--', markersize=1.5, linewidth=1) plt.plot(range(startPoint, endPoint), outSeq[startPoint:, i].numpy(), label='Recursive predictions for target' + str(i), color='blue', marker='.', linestyle='--', markersize=1.5, linewidth=1) plt.xlim([startPoint - 500, endPoint]) plt.xlabel('Index', fontsize=15) plt.ylabel('Value', fontsize=15) plt.title('Time-series Prediction on ' + args.data + ' Dataset', fontsize=18, fontweight='bold') plt.legend() plt.tight_layout() plt.text(startPoint - 500 + 10, target.min(), 'Epoch: ' + str(epoch), fontsize=15) save_dir = Path( 'result', args.data, args.filename).with_suffix('').joinpath('fig_prediction') save_dir.mkdir(parents=True, exist_ok=True) plt.savefig( save_dir.joinpath('fig_epoch' + str(epoch)).with_suffix('.png')) #plt.show() plt.close() return outSeq else: pass def evaluate_1step_pred(args, model, test_dataset): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0 with torch.no_grad(): hidden = model.init_hidden(args.eval_batch_size) for nbatch, i in enumerate( range(0, test_dataset.size(0) - 1, args.bptt)): inputSeq, targetSeq = get_batch(args, test_dataset, i) outSeq, hidden = model.forward(inputSeq, hidden) loss = criterion(outSeq.view(args.batch_size, -1), targetSeq.view(args.batch_size, -1)) hidden = model.repackage_hidden(hidden) total_loss += loss.item() return total_loss / nbatch def train(args, model, train_dataset, epoch): with torch.enable_grad(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() hidden = model.init_hidden(args.batch_size) for batch, i in enumerate( range(0, train_dataset.size(0) - 1, args.bptt)): inputSeq, targetSeq = get_batch(args, train_dataset, i) # inputSeq: [ seq_len * batch_size * feature_size ] # targetSeq: [ seq_len * batch_size * feature_size ] # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = model.repackage_hidden(hidden) hidden_ = model.repackage_hidden(hidden) optimizer.zero_grad() '''Loss1: Free running loss''' outVal = inputSeq[0].unsqueeze(0) outVals = [] hids1 = [] for i in range(inputSeq.size(0)): outVal, hidden_, hid = model.forward(outVal, hidden_, return_hiddens=True) outVals.append(outVal) hids1.append(hid) outSeq1 = torch.cat(outVals, dim=0) hids1 = torch.cat(hids1, dim=0) loss1 = criterion( outSeq1.contiguous().view(args.batch_size, -1), targetSeq.contiguous().view(args.batch_size, -1)) '''Loss2: Teacher forcing loss''' outSeq2, hidden, hids2 = model.forward(inputSeq, hidden, return_hiddens=True) loss2 = criterion( outSeq2.contiguous().view(args.batch_size, -1), targetSeq.contiguous().view(args.batch_size, -1)) '''Loss3: Simplified Professor forcing loss''' loss3 = criterion(hids1.view(args.batch_size, -1), hids2.view(args.batch_size, -1).detach()) '''Total loss = Loss1+Loss2+Loss3''' loss = loss1 + loss2 + loss3 loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) optimizer.step() total_loss += loss.item() if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss / args.log_interval elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | ms/batch {:5.4f} | ' 'loss {:5.2f} '.format( epoch, batch, len(train_dataset) // args.bptt, elapsed * 1000 / args.log_interval, cur_loss)) total_loss = 0 start_time = time.time() def evaluate(args, model, test_dataset): # Turn on evaluation mode which disables dropout. model.eval() with torch.no_grad(): total_loss = 0 hidden = model.init_hidden(args.eval_batch_size) nbatch = 1 for nbatch, i in enumerate( range(0, test_dataset.size(0) - 1, args.bptt)): inputSeq, targetSeq = get_batch(args, test_dataset, i) # inputSeq: [ seq_len * batch_size * feature_size ] # targetSeq: [ seq_len * batch_size * feature_size ] hidden_ = model.repackage_hidden(hidden) '''Loss1: Free running loss''' outVal = inputSeq[0].unsqueeze(0) outVals = [] hids1 = [] for i in range(inputSeq.size(0)): outVal, hidden_, hid = model.forward(outVal, hidden_, return_hiddens=True) outVals.append(outVal) hids1.append(hid) outSeq1 = torch.cat(outVals, dim=0) hids1 = torch.cat(hids1, dim=0) loss1 = criterion( outSeq1.contiguous().view(args.batch_size, -1), targetSeq.contiguous().view(args.batch_size, -1)) '''Loss2: Teacher forcing loss''' outSeq2, hidden, hids2 = model.forward(inputSeq, hidden, return_hiddens=True) loss2 = criterion( outSeq2.contiguous().view(args.batch_size, -1), targetSeq.contiguous().view(args.batch_size, -1)) '''Loss3: Simplified Professor forcing loss''' loss3 = criterion(hids1.view(args.batch_size, -1), hids2.view(args.batch_size, -1).detach()) '''Total loss = Loss1+Loss2+Loss3''' loss = loss1 + loss2 + loss3 total_loss += loss.item() return total_loss / (nbatch + 1) # Loop over epochs. if args.resume or args.pretrained: print("=> loading checkpoint ") checkpoint = torch.load( Path('save', args.data, 'checkpoint', args.filename).with_suffix('.pth')) args, start_epoch, best_val_loss = model.load_checkpoint( args, checkpoint, feature_dim) optimizer.load_state_dict((checkpoint['optimizer'])) del checkpoint epoch = start_epoch print("=> loaded checkpoint") else: epoch = 1 start_epoch = 1 best_val_loss = float('inf') print("=> Start training from scratch") print('-' * 89) print(args) print('-' * 89) if not args.pretrained: # At any point you can hit Ctrl + C to break out of training early. try: for epoch in range(start_epoch, args.epochs + 1): epoch_start_time = time.time() train(args, model, train_dataset, epoch) val_loss = evaluate(args, model, test_dataset) print('-' * 89) print( '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.4f} | ' .format(epoch, (time.time() - epoch_start_time), val_loss)) print('-' * 89) generate_output(args, epoch, model, gen_dataset, startPoint=1500) if epoch % args.save_interval == 0: # Save the model if the validation loss is the best we've seen so far. is_best = val_loss < best_val_loss best_val_loss = min(val_loss, best_val_loss) model_dictionary = { 'epoch': epoch, 'best_loss': best_val_loss, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'args': args } model.save_checkpoint(model_dictionary, is_best) except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # Calculate mean and covariance for each channel's prediction errors, and save them with the trained model print('=> calculating mean and covariance') means, covs = list(), list() train_dataset = TimeseriesData.batchify(args, TimeseriesData.trainData, bsz=1) for channel_idx in range(model.enc_input_size): mean, cov = fit_norm_distribution_param( args, model, train_dataset[:TimeseriesData.length], channel_idx) means.append(mean), covs.append(cov) model_dictionary = { 'epoch': max(epoch, start_epoch), 'best_loss': best_val_loss, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'args': args, 'means': means, 'covs': covs } model.save_checkpoint(model_dictionary, True) print('-' * 89)
epoch_acc = running_corrects.double() / dataset_size[phase] print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) if phase=='test' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) torch.save(best_model_wts, osp.join(Config['root_path'], Config['checkpoint_path'], 'model.pth')) print('Model saved at: {}'.format(osp.join(Config['root_path'], Config['checkpoint_path'], 'model.pth'))) f.create_dataset('acc', data = np.array(learnning_curve)) time_elapsed = time.time() - since print('Time taken to complete training: {:0f}m {:0f}s'.format(time_elapsed // 60, time_elapsed % 60)) print('Best acc: {:.4f}'.format(best_acc)) if __name__=='__main__': dataloaders, classes, dataset_size = get_dataloader(debug=Config['debug'], batch_size=Config['batch_size'], num_workers=Config['num_workers']) num_ftrs = model.fc.in_features model.fc = nn.Linear(num_ftrs, classes) criterion = nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), lr=Config['learning_rate']) device = torch.device('cuda:0' if torch.cuda.is_available() and Config['use_cuda'] else 'cpu') train_model(dataloaders, model, criterion, optimizer, device, num_epochs=Config['num_epochs'], dataset_size=dataset_size)
model = model.Backbone(num_layers=50, drop_ratio=0.6, mode='ir') model_dict = model.state_dict() pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) # update parameter model.load_state_dict(pretrained_dict) model = torch.nn.DataParallel(model).to(args.device) # 4.5 set loss_function loss_function_A = torch.nn.MarginRankingLoss().to(args.device) loss_function_B = torch.nn.MSELoss().to(args.device) # 4.6 choose optimizer optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60, 100], gamma=0.1, last_epoch=-1) # # 4.6.1 fixed the convolution layers # weight_p, bais_p = [], [] # count = 0 # for k in model.children(): # count += 1 # if count == 3: # for name, p in k.named_parameters():
import torch from data import loader from model import model, device from loss import get_loss import os optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.1) current_dir = os.path.dirname(__file__) weights_dir = os.path.join(current_dir, '..', 'weights') def train(epoch): print('#' * 15) print('Epoch {}, Latent Size {}'.format(epoch, model.latent_size)) print('#' * 15) model.train() for index, (x, _) in enumerate(loader): x = x.mean(dim=1, keepdim=True).to(device) optimizer.zero_grad() x_generated, mu, logvar = model(x) loss = get_loss(x_generated, x, mu, logvar) loss.backward() optimizer.step() if index % 100 == 0: print('Loss at iteration {0}: {1:.4f}'.format(index, loss.item())) if epoch == 4: filename = 'epoch{}_ls{}.pkl'.format(epoch, model.latent_size) torch.save(model.state_dict(), os.path.join(weights_dir, filename)) if epoch < 4: scheduler.step()
test_loss_list.append(epoch_loss) print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) if phase=='test' and epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) torch.save(best_model_wts, osp.join(Config['root_path'], Config['checkpoint_path'], 'model.pth')) print('Model saved at: {}'.format(osp.join(Config['root_path'], Config['checkpoint_path'], 'model.pth'))) time_elapsed = time.time() - since print('Time taken to complete training: {:0f}m {:0f}s'.format(time_elapsed // 60, time_elapsed % 60)) print('Best acc: {:.4f}'.format(best_acc)) np.savetxt('acc_list.txt',acc_list) np.savetxt('test_acc_list.txt',test_acc_list) np.savetxt('loss_list.txt',loss_list) np.savetxt('test_loss_list.txt',test_loss_list) if __name__=='__main__': dataloaders, dataset_size = get_dataloader_compat(debug=False, batch_size=Config['batch_size'], num_workers=Config['num_workers']) criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=Config['learning_rate']) device = torch.device('cuda:0' if torch.cuda.is_available() and Config['use_cuda'] else 'cpu') train_model(dataloaders, model, criterion, optimizer, device, num_epochs=Config['num_epochs'], dataset_size=dataset_size)
train_data = batchify(train_data, batch_size) val_data = batchify(val_data, eval_batch_size) test_data = batchify(test_data, eval_batch_size) def get_batch(source, batch_index): data = source[batch_index:batch_index + seq_len] # Shift target by one step. target = source[batch_index + 1:batch_index + 1 + seq_len] return data, target criterion = nn.MSELoss() optimizer = torch.optim.AdamW(model.parameters(), lr=lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=gamma) def train(): model.train() # Turn on the train mode total_loss = 0.0 start_time = time.time() batches = np.random.permutation( range(0, train_data.size(0) - seq_len, seq_len)) for batch_counter, i in enumerate(batches): data, targets = get_batch(train_data, i) optimizer.zero_grad()
from model import model from trainer import trainer from dataset import dataset from config import config import torch.nn as nn import torch.optim as optim import time import os device = config.device feature_extract = config.feature_extract if __name__ == "__main__": model = model.LeNet(config.train_class_num).to(device) params_to_update = model.parameters() print("Params to learn:") if feature_extract: params_to_update = [] for name, param in model.named_parameters(): if param.requires_grad: params_to_update.append(param) print("\t", name) else: for name, param in model.named_parameters(): if param.requires_grad: print("\t", name) optimizer_ft = optim.Adam(params_to_update, lr=0.001) criterion = nn.CrossEntropyLoss()
total_loss = 0. ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).item() hidden = repackage_hidden(hidden) data.to("cpu") targets.to("cpu") return total_loss / len(data_source) optimizer = torch.optim.Adagrad(model.parameters(), lr=args.lr, lr_decay=1e-4, weight_decay=1e-5) def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden)
target_transform=TransformVOCDetectionAnnotation(class_to_ind, False)) def collate_fn(batch): imgs, gt = zip(*batch) return imgs[0].unsqueeze(0), gt[0] train_loader = torch.utils.data.DataLoader( train_data, batch_size=1, shuffle=True, num_workers=0, collate_fn=collate_fn) val_loader = torch.utils.data.DataLoader( val_data, batch_size=1, shuffle=False, num_workers=0, collate_fn=collate_fn) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) def train(train_loader, model, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() model.train() end = time.time() for i, (im, gt) in (enumerate(train_loader)): adjust_learning_rate(optimizer, epoch) # measure data loading time data_time.update(time.time() - end)
temp = [] temp_box_x = int(i[4]) - int(i[2]) temp_box_y = int(i[5]) - int(i[3]) temp.append(i[0]) temp.append(min(temp_box_x, temp_box_y)) data_outputs.append(i[1:2]) data_inputs.append(temp) return data_inputs, data_outputs ## add more data: velocity , omega, past position if __name__ == '__main__': train_inputs, train_outputs = read_input() m = m() m.load_state_dict(torch.load('model_1.pt')) optimizer = optim.Adam(m.parameters(), lr=0.005) minibatch_size = 3 num_minibatches = len(train_inputs) // minibatch_size for epoch in (range(30)): # Training print("Training") # Put the model in training mode m.train() start_train = time.time() for group in tqdm(range(num_minibatches)): total_loss = None optimizer.zero_grad() for i in range(group * minibatch_size, (group + 1) * minibatch_size):
total_loss = 0. ntokens = len(corpus.dictionary) hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, args.bptt): data, targets = get_batch(data_source, i) output, hidden = model(data, hidden) output_flat = output.view(-1, ntokens) total_loss += len(data) * criterion(output_flat, targets).item() hidden = repackage_hidden(hidden) data.to("cpu") targets.to("cpu") return total_loss / len(data_source) optimizer = torch.optim.Adagrad(model.parameters(), lr=args.lr, lr_decay=1e-4, weight_decay=1e-5) def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced.
gen_dataset = TimeseriesData.batchify(args, TimeseriesData.testData, 1) ############################################################################### # Build the model ############################################################################### feature_dim = TimeseriesData.trainData.size(1) model = model.RNNPredictor(rnn_type=args.model, enc_inp_size=feature_dim, rnn_inp_size=args.emsize, rnn_hid_size=args.nhid, dec_out_size=feature_dim, nlayers=args.nlayers, dropout=args.dropout, tie_weights=args.tied, res_connection=args.res_connection).to(args.device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) criterion = nn.MSELoss() ############################################################################### # Training code ############################################################################### def get_batch(args, source, i): seq_len = min(args.bptt, len(source) - 1 - i) data = source[i:i + seq_len] # [ seq_len * batch_size * feature_size ] target = source[i + 1:i + 1 + seq_len] # [ (seq_len x batch_size x feature_size) ] return data, target
############################################################################### # Build the model ############################################################################### feature_dim = TimeseriesData.trainData.size(1) model = model.RNNPredictor(rnn_type = args.model, enc_inp_size=feature_dim, rnn_inp_size = args.emsize, rnn_hid_size = args.nhid, dec_out_size=feature_dim, nlayers = args.nlayers, dropout = args.dropout, tie_weights= args.tied, res_connection=args.res_connection).to(args.device) optimizer = optim.Adam(model.parameters(), lr= args.lr,weight_decay=args.weight_decay) criterion = nn.MSELoss() ############################################################################### # Training code ############################################################################### def get_batch(args,source, i): seq_len = min(args.bptt, len(source) - 1 - i) data = source[i:i+seq_len] # [ seq_len * batch_size * feature_size ] target = source[i+1:i+1+seq_len] # [ (seq_len x batch_size x feature_size) ] return data, target def generate_output(args,epoch, model, gen_dataset, disp_uncertainty=True,startPoint=500, endPoint=3500): if args.save_fig: # Turn on evaluation mode which disables dropout. model.eval() hidden = model.init_hidden(1)
args.data + '/val_images', transform=data_transforms['val']), batch_size=args.batch_size, shuffle=False, num_workers=1) # Neural network and optimizer # We define neural net in model.py so that it can be reused by the evaluate.py script from model import model if use_cuda: print('Using GPU') model.cuda() else: print('Using CPU') optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) def train(epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): if use_cuda: data, target = data.cuda(), target.cuda() optimizer.zero_grad() output = model(data) criterion = torch.nn.CrossEntropyLoss(reduction='mean') loss = criterion(output, target) loss.backward() optimizer.step() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
############################################################################### # Build the model ############################################################################### feature_dim = TimeseriesData.trainData.size(1) model = model.RNNPredictor(rnn_type = args.model, enc_inp_size=feature_dim, rnn_inp_size = args.emsize, rnn_hid_size = args.nhid, dec_out_size=feature_dim, nlayers = args.nlayers, dropout = args.dropout, tie_weights= args.tied, res_connection=args.res_connection).to(args.device) optimizer = optim.Adam(model.parameters(), lr= args.lr,weight_decay=args.weight_decay) criterion = nn.MSELoss() ############################################################################### # Training code ############################################################################### def get_batch(args,source, i): seq_len = min(args.bptt, len(source) - 1 - i) data = source[i:i+seq_len] # [ seq_len * batch_size * feature_size ] target = source[i+1:i+1+seq_len] # [ (seq_len x batch_size x feature_size) ] return data, target def generate_output(args,epoch, model, gen_dataset, disp_uncertainty=True,startPoint=1, endPoint=300): if args.save_fig: # Turn on evaluation mode which disables dropout. model.eval() hidden = model.init_hidden(1)
from model import model, nn from data import load_data from sklearn.metrics import precision_score, recall_score from constants import * from transformers import AdamW, get_linear_schedule_with_warmup import numpy as np train_loader, valid_loader, test_loader, train_len, valid_len, test_len = load_data() model = model.to(device) criterion = nn.CrossEntropyLoss() # as per original bert paper, fine-tuning is done by Adam optimizer with weight decay optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False) total_steps = len(train_loader) * EPOCHS scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=0, num_training_steps=total_steps ) def train(model): model = model.train() optimizer.zero_grad() losses = [] accurate_preds = 0 all_targets = [] all_predictions = [] for d in train_loader: inputs = d['input_ids'].to(device)
print(model) #定义训练和验证集 和 dataloader eval_dataset = dsets.ImageFolder(EVAL_DATA_PATH, transform=preprocessing.transform) train_dataset = dsets.ImageFolder(TRAIN_DATA_PATH, transform=preprocessing.transform) eval_dataloader = DataLoader(eval_dataset, batch_size=BATCH_SIZE, shuffle=True) train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True) #使用交叉熵为 loss,使用 SGD 优化方法 criterion = NN.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) #载入上次训练 model.load_state_dict(torch.load("./chkpoint_res.bin")) #开始训练 for epoch in range(0, 100): model.train() with tqdm(train_dataloader, unit="batch") as tepoch: #进度条 correct = 0 batch = 0 for data, target in tepoch: batch += 1 tepoch.set_description(f"Epoch {epoch}") data, target = data.cuda(), target.cuda() #数据载入 GPU
print('train dataset length : ', len(train_dataset)) print('test dataset length : ', len(test_dataset)) train_dataloader = data_loader.load_pathloss_dataset(train_dataset, shuffle=True, num_workers=12, batch_size=batch_size, type='RNN') test_dataloader = data_loader.load_pathloss_dataset(test_dataset, shuffle=True, batch_size=batch_size, num_workers=12, type='RNN') model = model.VanillaLSTMNetwork(input_size=input_sequence).cuda() criterion = nn.MSELoss().cuda() optimizer = torch.optim.Adadelta(model.parameters(), lr=learning_rate) writer = set_tensorboard_writer('../runs_rnn/model01-vanilla-sample12') for epoch in range(num_epochs): for i, data in enumerate(train_dataloader): y_pred = model(data[:][0].cuda()).reshape(-1) y_data = data[:][1].cuda() loss = criterion(y_pred, y_data) loss.backward() optimizer.step() # ...학습 중 손실(running loss)을 기록하고 writer.add_scalar('mseloss training loss', loss / 1000, epoch * len(train_dataloader) + i)
pretrained_dict = { k: v for k, v in new_state_dcit.items() if k in model_dict } for k, v in model_dict.items(): if k not in pretrained_dict: print(k) model.load_state_dict(pretrained_dict, strict=True) else: print("===> no models found at '{}'".format(args.pretrained)) print("===> Setting Optimizer") optimizer = optim.Adam(model.parameters(), lr=args.lr) def train(epoch): model.train() utils.adjust_learning_rate(optimizer, epoch, args.step_size, args.lr, args.gamma) print('epoch =', epoch, 'lr = ', optimizer.param_groups[0]['lr']) for iteration, (lr_tensor, hr_tensor) in enumerate(training_data_loader, 1): if args.cuda: lr_tensor = lr_tensor.to(device) # ranges from [0, 1] hr_tensor = hr_tensor.to(device) # ranges from [0, 1] optimizer.zero_grad()
test_dataset = preprocess_data.batchify(args, TimeseriesData.testData, args.eval_batch_size) gen_dataset = preprocess_data.batchify(args, TimeseriesData.testData, 1) ############################################################################### # Build the model ############################################################################### model = model.RNNPredictor(rnn_type=args.model, enc_inp_size=3, rnn_inp_size=args.emsize, rnn_hid_size=args.nhid, dec_out_size=3, nlayers=args.nlayers, dropout=args.dropout, tie_weights=args.tied) print(list(model.parameters())) if args.cuda: model.cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = nn.MSELoss() ############################################################################### # Training code ############################################################################### def get_batch(source, i, evaluation=False): seq_len = min(args.bptt, len(source) - 1 - i) data = Variable( source[i:i + seq_len], volatile=evaluation) # [ seq_len * batch_size * feature_size ] target = Variable(