def train_epoch(model, optim, criterion, loader, lbda=None, cbns=None, maps=None, constraint=None): model.train() total = 0 top1 = 0 for i, (batch, label) in enumerate(loader): optim.zero_grad() batch, label = batch.to('cuda'), label.to('cuda') total += batch.size(0) out = model(batch) _, pred = out.max(dim=1) top1 += pred.eq(label).sum() if constraint: reg = lbda * regularizer(model, constraint, cbns, maps) loss = criterion(out, label) + reg else: loss = criterion(out, label) loss.backward() optim.step() if (i % 100 == 0) or (i == len(loader) - 1): print('Train | Batch ({}/{}) | Top-1: {:.2f} ({}/{})'.format( i + 1, len(loader), float(top1) / total * 100, top1, total)) if constraint: truncate_smallbeta(model, cbns)
def training(model, optim, criterion_cls, train_iter, epoch): model.train() losses = [] label = [] preds = [] softmax = nn.Softmax(dim = -1) print('\nTrain_Epoch:', epoch) for batch in tqdm.tqdm(train_iter): optim.zero_grad() input_ids = batch['input_ids'].cuda() attn_mask = batch['attention_mask'].cuda() token_type_ids = batch['token_type_ids'].cuda() truelabel_cls = batch['cls_label'].cuda() logits_cls = model(input_ids, attn_mask, token_type_ids) ## if out dim is (bs x seqlen x numclass) -> (total_words_batch x numclass) ## if true label is (bs x seqlen) -> (total_words_batch) loss_cls = criterion_cls (logits_cls.view(-1, 3), truelabel_cls.view(-1, )) loss = loss_cls losses.append(loss.item()) #for now we are only interested in accuracy and f1 of the classification task label.extend(truelabel_cls.cpu().detach().numpy()) preds_cls = softmax(logits_cls).argmax(1) preds.extend(preds_cls.view(-1).cpu().detach().numpy()) loss.backward() optim.step() return losses, label, preds
def test_param_update(self): # TODO: there should be some quantifyable test condition at least... for i in range(2500): optim.zero_grad() x = torch.randn(self.batch_size, inp_size) y = test_net(x, jac=False)[0] loss = torch.mean((y - x)**2) loss.backward() for name, p in test_net.named_parameters(): if 'weights' in name: gp = torch.mm(p.grad, p.data.t()) p.grad = torch.mm(gp - gp.t(), p.data) weights = p.data optim.step() if i % 25 == 0: WWt = torch.mm(weights, weights.t()) WWt -= torch.eye(weights.shape[0]) if VERBOSE: print(loss.item(), end='\t') print(torch.max(torch.abs(WWt)).item(), end='\t') print(torch.mean(WWt**2).item(), end='\t') print()
def train_epoch(self, optim, criterion): self.model.train() total = 0 top1 = 0 data_t = 0 train_t = 0 total_loss = 0 s = time.time() for i, (batch, label) in enumerate(self.train_loader): data_t += time.time() - s s = time.time() optim.zero_grad() batch, label = batch.to('cuda'), label.to('cuda') total += batch.size(0) out = self.model(batch) loss = criterion(out, label) loss.backward() total_loss += loss.item() optim.step() train_t += time.time() - s if (i % 100 == 0) or (i == len(self.train_loader) - 1): print( 'Batch ({}/{}) | Loss: {:.3f} | (PerBatch) Data: {:.3f}s, Network: {:.3f}s' .format(i + 1, len(self.train_loader), total_loss / (i + 1), data_t / (i + 1), train_t / (i + 1))) s = time.time()
def train(j): #begin to train correct = 0 total = 0 for epoch in range(EPOCH): for step, (x, y) in enumerate(train_loader): if use_cuda: x, y = x.cuda(), y.cuda() # 封装为自动求导类型 x = Variable(x) y = Variable(y) # 前向传播 output, x1 = cnn(x.float()) loss = loss_func(output, y.squeeze()) # 梯度清空与梯度下降 optimizer.zero_grad() loss.backward() optimizer.step() _, predict = t.max(output, dim=1) # print('Predict:{}'.format(predict)) correct += predict.eq(y.data.squeeze()).cpu().sum() total += y.size(0) # caculate the accuracy print('Loss name->{}:{}'.format(j, loss.item())) #Accuracy # print('Accuracy:{}'.format(100.*predict.eq(y.data.squeeze()).cpu().sum()/y.size(0))) # acc = 100. * correct/total # print('Accuracy:{}'.format(acc)) accuracy = test(model=cnn, name=j) return accuracy, loss
def train_network(tr_loader, criterion, optim, device='cuda', net=SimpleCNN(), n_epoch=5): net = net.to(device) for epoch in range(n_epoch): running_loss = 0.0 for i, data in enumerate(tr_loader, 0): # getting inputs and labels for batch inputs, labels = data[0].to(device), data[1].to(device) optim.zero_grad() # forward pass outputs = net.forward(inputs) _, out = torch.max(outputs.data, 1) if DEBUG: print('outputs shape {}'.format(outputs.shape)) print('labels shape {}'.format(labels.shape)) print(outputs[0]) raise Exception() loss = criterion(outputs, labels) # backward pass loss.backward() optim.step() # print what we've got running_loss += loss.item() if i % 1000 == 999: print('[%d,%5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 1000)) running_loss = 0.0
def train_model(model, train_iter, epoch, batch_size, learning_rate): total_epoch_loss = 0 total_epoch_acc = 0 model.cuda() optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate) steps = 0 model.train() for idx, batch in enumerate(train_iter): text = batch.text[0] target = batch.label target = torch.autograd.Variable(target).long() if torch.cuda.is_available(): text = text.cuda() target = target.cuda() if ( text.size()[0] is not batch_size ): # One of the batch returned by BucketIterator has length different than batch_size. continue optim.zero_grad() prediction = model(text) loss = F.cross_entropy(prediction, target) num_corrects = (torch.max(prediction, 1)[1].view( target.size()).data == target.data).float().sum() acc = 100.0 * num_corrects / len(batch) loss.backward() clip_gradient(model, 1e-1) optim.step() steps += 1 total_epoch_loss += loss.item() total_epoch_acc += acc.item() return total_epoch_loss / len(train_iter), total_epoch_acc / len( train_iter)
def train_model(model, train_iter, mode, prox_epsilon=1, epsilon = 0.01): total_epoch_loss = 0 total_epoch_acc = 0 steps = 0 model.train() for idx, batch in enumerate(train_iter): input = batch[0] input.requires_grad = True target = batch[1] target = torch.autograd.Variable(target).long() r = 0 optim.zero_grad() prediction = model(input, r, batch_size=input.size()[0], mode=mode, prox_epsilon=prox_epsilon) # print("prediction ", prediction.shape) # print("target ", target.shape) loss = loss_fn(prediction, target) if mode == 'AdvLSTM': ''' Add adversarial training term to loss''' r = compute_perturbation(loss, model) adv_prediction = model(input, r, batch_size=input.size()[0], mode=mode, prox_epsilon=prox_epsilon, epsilon=epsilon) loss = loss_fn(adv_prediction, target) num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).float().sum() acc = 100.0 * num_corrects/(input.size()[0]) loss.backward() clip_gradient(model, 1e-1) optim.step() steps += 1 total_epoch_loss += loss.item() total_epoch_acc += acc.item() return total_epoch_loss/len(train_iter), total_epoch_acc/len(train_iter)
def train_step2(model, optim, nmsdps, device="cpu", CUDA_FLAG=False, use_NMSDP_to_sparse2=False): optim.zero_grad() Gs = [] core_var_masks = [] var_lemma_countss = [] def maybe_non_blocking(tsr): if CUDA_FLAG: return tsr.cuda(non_blocking=True) else: return tsr for nmsdp in nmsdps: if not use_NMSDP_to_sparse2: G = NMSDP_to_sparse(nmsdp) Gs.append(maybe_non_blocking(G)) core_var_masks.append(maybe_non_blocking(torch.from_numpy(nmsdp.core_var_mask).type(torch.bool).squeeze()).to(device)) var_lemma_countss.append(maybe_non_blocking(torch.from_numpy(nmsdp.var_lemma_counts).type(torch.float32).squeeze()).to(device)) else: G = NMSDP_to_sparse2(nmsdp) Gs.append(maybe_non_blocking(G)) core_var_masks.append(maybe_non_blocking(nmsdp.core_var_mask.type(torch.bool).squeeze()).to(device)) var_lemma_countss.append(maybe_non_blocking(nmsdp.var_lemma_counts.type(torch.float32).squeeze()).to(device)) V_drat_logitss, V_core_logitss = model(Gs) drat_loss = compute_softmax_kldiv_loss_from_logits(V_drat_logitss, var_lemma_countss) core_loss = compute_mask_loss(V_core_logitss, core_var_masks) loss = core_loss + drat_loss loss.backward() optim.step() return drat_loss, core_loss, loss
def train(epoch, model, train_loader, optim): reconstruction_loss = 0 kld_loss = 0 total_loss = 0 for i, (x, y) in enumerate(train_loader): try: optim.zero_grad() pred, mu, logvar = model(x.to(device), y.to(device)) recon_loss, kld = loss_function(x.to(device), pred, mu, logvar) loss = recon_loss + kld loss.backward() optim.step() total_loss += loss.cpu().data.numpy() * x.shape[0] reconstruction_loss += recon_loss.cpu().data.numpy() * x.shape[0] kld_loss += kld.cpu().data.numpy() * x.shape[0] if i == 0: print("Gradients") for name, param in model.named_parameters(): if "bias" in name: print(name, param.grad[0], end=" ") else: print(name, param.grad[0, 0], end=" ") print() except Exception as e: traceback.print_exe() torch.cuda.empty_cache() continue reconstruction_loss /= len(train_loader.dataset) kld_loss /= len(train_loader.dataset) total_loss /= len(train_loader.dataset) return total_loss, kld_loss, reconstruction_loss
def train_model(model, train_iter, mode): total_epoch_loss = 0 total_epoch_acc = 0 steps = 0 model.train() for idx, batch in enumerate(train_iter): input = batch[0] target = batch[1] target = torch.autograd.Variable(target).long() r = 0 optim.zero_grad() prediction = model(input, r, batch_size = input.size()[0], mode = mode) loss = loss_fn(prediction, target) if mode == 'AdvLSTM': ''' Add adversarial training term to loss''' num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).float().sum() acc = 100.0 * num_corrects/(input.size()[0]) loss.backward() clip_gradient(model, 1e-1) optim.step() steps += 1 total_epoch_loss += loss.item() total_epoch_acc += acc.item() return total_epoch_loss/len(train_iter), total_epoch_acc/len(train_iter)
def train_model(model, train_dataloader, device, num_epoch, logger): ''' train model ''' logger.info("***** Initial optimizer *****") optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters())) logger.info("***** Running train *****") total_epoch_loss = 0 total_epoch_acc = 0 model.to(device) model.train() for _ in trange(int(num_epoch), desc="Epoch"): tr_loss = 0 nb_tr_example, nb_tr_steps = 0, 0 for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")): batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids = batch optim.zero_grad() prediction = model(text) loss = loss_fn(prediction, label_ids) num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).float().sum() acc = 100.0*num_corrects/len(batch) loss.backward() clip_gradient(model, 1e-1) optim.step() step += 1 total_epoch_loss += loss.item() total_epoch_acc += acc.item() return model
def fewshot_test(self, epoch): A = pickle.loads(pickle.dumps(self.A_net)) optim = torch.optim.SGD(A.parameters(), self.opt.lr*1e-2, momentum=0.9, weight_decay=5e-4) for i in range(self.opt.fewshots): optim.zero_grad() inter = A(self.input[i].unsqueeze(0).to(self.device)) inter_grad = self.Grad_net(inter) grad = torch.autograd.grad(outputs=inter, inputs=A.parameters(), grad_outputs=inter_grad, create_graph=False, retain_graph=False) torch.autograd.backward(A.parameters(), grad_tensors=grad, retain_graph=False, create_graph=False) optim.step() for i in range(self.opt.fewshots): optim.zero_grad() inter = A(self.input[i].unsqueeze(0).to(self.device)) loss = self.criterion1(inter, self.prototype.expand(1, 60, 120)) loss.backward() optim.step() with torch.no_grad(): tmp_h = self.B_net.h tmp_c = self.B_net.c # if self.opt.lstm_hc_usage: self.B_net.feed_hc([self.h, self.c]) data = self.input[self.opt.fewshots:] inter = A(data.to(self.device)) self.decision, self.predict = self.B_net(inter) self.B_net.feed_hc([tmp_h, tmp_c]) self.t_ordloss = self.criterion2(self.predict[0].unsqueeze(0), self.true_rPPG[0].unsqueeze(0).to(self.device))
def _backprop(self, optim, loss, params): # learn optim.zero_grad() # scatter previous optimizer leftovers loss.backward() # propagate gradients torch.nn.utils.clip_grad_norm_( params, self.clip_norm) # avoid (inf, nan) stuffs optim.step() # backprop trigger
def fit_to_training(self, training, optimizer, lr=0.1, normalize=True): """ fits the (torch) neural network self to the data contained in training, with a progression bar Parameters: training : list of pairs (input : tensor, expected : tensor) the data used to train the nn optimizer : an optimizer from torch.optim, eg torch.optim.Adam or torch.optim.SGD lr : float the learning rate normalize : bool if True, normalizes each learning step. This option is useful to prevent overflow during training. """ self.train() for batch_idx, (data, target) in tqdm(enumerate(training), total=len(training)): optim = optimizer(self.parameters(), lr=lr) optim.zero_grad() output = self(data) loss = torch.nn.MSELoss() error = loss(output, target) error.backward(retain_graph=True) if normalize: with torch.no_grad(): norm = self.norm_coefficients( [param.grad.data for param in self.parameters()]) if norm > 1: for param in self.parameters(): param.grad.data /= norm optim.step()
def train(images,labels,model,criterion,optim): batch_size = 60 iters = len(images) // batch_size loss = 0 all_loss = 0 loss_buf = [] for it in range(iters): optim.zero_grad() data = images[it*batch_size:batch_size*(it+1)] batch_data = torch.randn((batch_size,1,28,28)) for i in range(batch_size): batch_data[i] = torch.tensor(data[i],dtype=torch.float32).view(1,28,28) label = labels[it*batch_size:batch_size*(it+1)] label = torch.tensor(label,dtype=torch.long) output = model(batch_data).squeeze(1) loss = criterion(output,label) loss.backward() optim.step() all_loss += loss if it % 10 == 0: loss_buf.append(all_loss.item()/600) all_loss = 0 if it % 100 == 0: print('have finished % {}'.format((it//100+1)*10)) return loss_buf
def train(args, model, device, train_loader, optim, epoch): loss_classify_list = [] loss_regression_list = [] model.train() for idx, (data, box, label) in enumerate(train_loader): data, box, label = data.to(device), box.to(device), label.to(device) optim.zero_grad() c, r = model(data) loss_classify = CELoss( c, label.long()) # cross entropy loss for classify path loss_regression = SmoothL1(r, box) loss = loss_classify + loss_regression loss.backward() optim.step() if idx % 5 == 0: print('epoch: ' + str(epoch) + '\ttrain iter: ' + str(idx * len(data)) + '\tclassify loss: ' + str(loss_classify.item()) + '\tregression loss: ' + str(loss_regression.item())) loss_classify_list.append(loss_classify.item()) loss_regression_list.append(loss_regression.item()) return loss_classify_list, loss_regression_list
def train_classifier(model, optim, dataset, epochs, path, test, start=0): model.train() for epoch in range(start, epochs): losses = [] kld_fs = [] kld_zs = [] cross_entropies = [] print("Running Epoch: {}".format(epoch + 1)) for i, item in tqdm(enumerate(dataset, 1)): features, target, subject = item target = torch.argmax(target, dim=1) # one hot back to int optim.zero_grad() f_mean, f_logvar, f, z_post_mean, z_post_logvar, z, z_prior_mean,\ z_prior_logvar, pred_target = model(features) loss, kld_f, kld_z, cross_entropy = loss_fn( target, pred_target, f_mean, f_logvar, z_post_mean, z_post_logvar, z_prior_mean, z_prior_logvar) loss.backward() optim.step() losses.append(loss.item()) kld_fs.append(kld_f.item()) kld_zs.append(kld_z.item()) cross_entropies.append(cross_entropy.item()) # training_accuracy = check_accuracy(model, dataset) test_accuracy = check_accuracy(model, test) meanloss = np.mean(losses) meanf = np.mean(kld_fs) meanz = np.mean(kld_zs) mean_cross_entropies = np.mean(cross_entropies) print("Epoch {} : Average Loss: {} KL of f : {} KL of z : {} " "Cross Entropy: {} Test Accuracy: {}".format( epoch + 1, meanloss, meanf, meanz, mean_cross_entropies, test_accuracy)) save_model(model, optim, epoch, path)
def train(x, y, validation=False): optim.zero_grad() bs = x.shape[1] h1_tm1 = Variable(torch.zeros((bs, hidden_size))).to(DEVICE) c1_tm1 = Variable(torch.zeros((bs, hidden_size))).to(DEVICE) h2_tm1 = Variable(torch.zeros((bs, hidden_size))).to(DEVICE) c2_tm1 = Variable(torch.zeros((bs, hidden_size))).to(DEVICE) outputs = [] x = x.to(DEVICE) y = y.to(DEVICE) # one batch of x for i in np.arange(0, x.shape[0]): xin = x[i] output, h1_tm1, c1_tm1, h2_tm1, c2_tm1 = lstm(xin, h1_tm1, c1_tm1, h2_tm1, c2_tm1) outputs += [output] y_pred = torch.stack(outputs, 0) y_pred_flat = y_pred.reshape(y_pred.shape[0] * y_pred.shape[1], y_pred.shape[2]) y1_flat = y[:, :, 0] y2_flat = y[:, :, 1] y1_flat = y1_flat.reshape(y1_flat.shape[0] * y1_flat.shape[1])[:, None] y2_flat = y2_flat.reshape(y2_flat.shape[0] * y2_flat.shape[1])[:, None] out_pi, out_mu1, out_mu2, out_sigma1, out_sigma2, out_corr = lstm.get_mixture_coef( y_pred_flat) loss = lstm.get_lossfunc(out_pi, out_mu1, out_mu2, out_sigma1, out_sigma2, out_corr, y1_flat, y2_flat) if not validation: loss.backward() for p in lstm.parameters(): p.grad.data.clamp_(min=-grad_clip, max=grad_clip) optim.step() rloss = loss.cpu().data.numpy() return y_pred, rloss
def train_epoch(model, train_data, optim, device, opt): model.train() total_loss = 0 for batch in tqdm(train_data, mininterval=2, desc=' - (Training)', leave=False): mini_batch_loss = 0 for src_seq, src_len, trg_seq in batch: # make zero gradient optim.zero_grad() # model forward output = model(src_seq.to(device), src_len, trg_seq.to(device)) loss = custom_loss(output, trg_seq.to(device), opt.alpha, opt.beta) # backward pass loss.backward() # gradient clipping torch.nn.utils.clip_grad_norm_(model.parameters(), opt.max_grad_norm) # optimize step optim.step() # calculate total loss mini_batch_loss += loss.item() total_loss += mini_batch_loss return total_loss / len(train_data)
def animate(i,config,net,optim,data): print(i,'/',iternum); net.train(); out = net(data); loss = config.loss(data,out); optim.zero_grad(); loss['overall'].backward(); optim.step(); print(loss['overall'].data.cpu().numpy()); net.eval(); with torch.no_grad(): out = net(data); box2d_src = data[1].data.cpu().numpy(); box3d_src = data[2].data.cpu().numpy(); box2d_tgt = data[3].data.cpu().numpy(); box3d_tgt = data[4].data.cpu().numpy(); r = data[5].data.cpu().numpy(); gts = data[6].data.cpu().numpy(); y = out['y'].data.cpu().numpy(); num = box3d_src.shape[0]; col = 8; row = num // col; for ri in range(row): for cj in range(col): ni = ri*col+cj; ymap = y[ni,...]; ymap *= np.pi; ymap[1] *= 2; c3d = recon(box3d_src[ni,...],r[ni,...],ymap); pv[ni].set_data(c3d[:,0],c3d[:,1]); pv[ni].set_3d_properties(c3d[:,2]); if i == iternum-1: exit(); return pv;
def loss_pass(self, net, loss_func, loader, epoch, optim, op='train'): """ Performs one epoch & continually updates the model """ if op == 'valid': torch.set_grad_enabled(False) print(f"STARTING {op} EPOCH{epoch}") t0 = time.time() total_epoch_loss = 0 for i, input_dict in enumerate(loader): ts_imgbatch, ts_anglebatch = input_dict.get("img"), input_dict.get("angle") ts_imgbatch, ts_anglebatch = ts_imgbatch.to(device), ts_anglebatch.to(device) input_dict["img"] = ts_imgbatch input_dict["angle"] = ts_imgbatch #Classic train loop optim.zero_grad() out_dict = net(input_dict) ts_predanglebatch = out_dict["angle"] ts_loss = loss_func(ts_predanglebatch, ts_anglebatch) if op=='train': ts_loss.backward() optim.step() print("loss:{}".format(ts_loss.item())) total_epoch_loss += ts_loss.item() if i % 20 == 0: self.vis.visualize_batch(ts_imgbatch, ts_anglebatch, ts_predanglebatch, global_step=epoch) if op == 'valid': torch.set_grad_enabled(True) print(f"FINISHED {op} EPOCH{epoch}") print(f"----{time.time() - t0} seconds----") return total_epoch_loss
def performUpdates(self, lossF, optim, batch_size, Qnet, gamma): miniBatch = self.sample(batch_size) # pdb.set_trace() for i in range(0, batch_size): sarsd = miniBatch[i] state = sarsd[0] action = sarsd[1] ri = sarsd[2] ns = sarsd[3] done = sarsd[4] QvalsForState = Qnet(get_variable_from_input(state)) targetValForState = torch.FloatTensor() targetValForState = QvalsForState.data.clone() if done: targetValForState[action] = ri else: QvalForNextState = Qnet(get_variable_from_input(ns)) maxQAction = torch.max(QvalForNextState) # pdb.set_trace() targetValForState[action] = (ri + gamma * maxQAction).data[0] optim.zero_grad() loss = lossF(QvalsForState, get_variable_from_input(targetValForState, False)) loss.backward() optim.step()
def train_epoch(model, optim, train_loader, epoch, device, log_interval): model.train() epoch_loss = 0 for batch_idx, data in enumerate(train_loader): data_size = len(data[0]) if isinstance(data, list) else len(data) optim.zero_grad() loss = _eval(model, data, device) loss.backward() optim.step() epoch_loss += loss.item() if (batch_idx + 1) % log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, (batch_idx + 1) * data_size, len(train_loader.dataset), 100. * (batch_idx + 1) / len(train_loader), epoch_loss / (log_interval))) epoch_loss = 0 for module in model.modules(): if isinstance(module, BatchNormFlow): module.momentum = 0 with torch.no_grad(): model(train_loader.dataset.tensors[0].to(device)) for module in model.modules(): if isinstance(module, BatchNormFlow): module.momentum = 1
def optimize_model(): if len(memory) < BATCH_SIZE: return transitions = memory.sample(BATCH_SIZE) states = np.vstack([x.state for x in transitions]) actions = np.array([x.action for x in transitions]) rewards = np.array([x.reward for x in transitions]) next_states = np.vstack([x.next_state for x in transitions]) done = np.array([x.done for x in transitions]) Q_predict = get_Q(policy_net, states) Q_target = Q_predict.clone().data.cpu().numpy() # For DQN # Q_target[np.arange(len(Q_target)), actions] = rewards + GAMMA * np.max(get_Q(target_net, next_states).data.cpu().numpy(), axis=1) * ~done # For Double DQN Q_next_state = np.argmax(get_Q(policy_net, next_states).data.cpu().numpy(), axis=1).reshape(-1) Q_target[np.arange(len(Q_target)), actions] = rewards + GAMMA * np.choose(Q_next_state, get_Q(target_net, next_states).data.cpu().numpy().T) * ~done Q_target = to_variable(Q_target, type=torch.float) policy_net.train(mode=True) optim.zero_grad() loss = loss_fn(Q_predict, Q_target) loss.backward() optim.step()
def train(net, trainloader, optim, criterion, epoch, device): print("Training") net.train() train_loss = 0 total = 0 total_correct = 0 iterator = tqdm(trainloader) for inputs, targets in iterator: inputs, targets = inputs.to(device), targets.to(device) optim.zero_grad() outputs, _ = net(inputs) loss = criterion(outputs, targets) loss.backward() optim.step() train_loss += loss.item() _, predicted = torch.max(outputs.data, 1) total_correct += (predicted == targets).sum().item() total += targets.size(0) print("Epoch: [{}] loss: [{:.2f}] Accuracy [{:.2f}] ".format( epoch + 1, train_loss / len(trainloader), total_correct * 100 / total))
def train(model, criterion, optim, loader, epoch, verbose=True): losses = AverageMeter() triplets = AverageMeter() max_iter = loader.__len__() model.train() f_log = logging.getLogger("file-log") print('train', end='') for iter, ((fp0, v0), (fp1, v1), cls) in enumerate(loader): optim.zero_grad() feature = torch.cat([fp0, fp1]) target = torch.cat([cls, cls]) out = model(feature.cuda()) loss, n_triplet = criterion(out, target) losses.update(loss.item()) triplets.update(n_triplet) loss.backward() optim.step() out_str = '[Train - epoch: {}, iter: {}/{}] loss: {}({}) triplet: {}({})' \ .format(epoch, iter+1, max_iter, round(losses.val, 4), round(losses.avg, 4), triplets.val, round(triplets.avg)) f_log.info(out_str) if iter % 10 == 0: print('\r[{} {}]'.format(get_time(), os.path.basename(__file__)) + out_str, end='') print("") return losses, triplets
def optimize(agent, target, optim, memory): if len(memory) < BATCH_SIZE: return transitions = memory.sample(BATCH_SIZE) batch = Transition(*zip(*transitions)) non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state)), device=device, dtype=torch.uint8) non_final_next_states = torch.cat([s for s in batch.next_state if s is not None]) state_batch = torch.cat(batch.state).to(device) action_batch = torch.cat(batch.action).to(device) reward_batch = torch.cat(batch.reward).to(device) state_action_values = agent(state_batch).gather(1, action_batch) next_state_values = torch.zeros(BATCH_SIZE, device=device) next_state_values[non_final_mask] = target( non_final_next_states.to(device)).max(1)[0].detach() expected_state_action_values = (next_state_values * GAMMA) + reward_batch loss = F.smooth_l1_loss(state_action_values, expected_state_action_values.unsqueeze(1)) optim.zero_grad() loss.backward() optim.step()
def train_model(model, train_iter, epoch): total_epoch_loss = 0 total_epoch_acc = 0 model.cuda() optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters())) steps = 0 model.train() for idx, batch in enumerate(train_iter): text = batch.text[0] target = batch.label target = torch.autograd.Variable(target).long() if torch.cuda.is_available(): text = text.cuda() target = target.cuda() if (text.size()[0] is not 32): # One of the batch returned by BucketIterator has length different than 32. continue optim.zero_grad() prediction = model(text) loss = loss_fn(prediction, target) num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).float().sum() acc = 100.0 * num_corrects / len(batch) loss.backward() clip_gradient(model, 1e-1) optim.step() steps += 1 if steps % 100 == 0: print( f'Epoch: {epoch + 1}, Idx: {idx + 1}, Training Loss: {loss.item():.4f}, Training Accuracy: {acc.item(): .2f}%') total_epoch_loss += loss.item() total_epoch_acc += acc.item() return total_epoch_loss / len(train_iter), total_epoch_acc / len(train_iter)
def train(model, loader, mixup, epoch, optim, criterion, device, dtype, batch_size,log_interval): model.train() correct1, correct5 = 0, 0 enum_load =enumerate(tqdm(loader)) for batch_idx, (data, t) in enum_load: data, t = data.to(device=device), t.to(device=device) data, target = mixup(data, t) optim.zero_grad() output = model(data) print(output) loss = criterion(output.to(device=device), target.to(device=device)) loss.backward() optim.batch_step() corr = correct(output, t, topk=(1, 2)) correct1 += corr[0] correct5 += corr[1] if batch_idx % log_interval == 0 : tqdm.write( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}. ' 'Top-1 accuracy: {:.2f}%({:.2f}%). ' 'Top-5 accuracy: {:.2f}%({:.2f}%).'.format(epoch, batch_idx, len(loader), 100. * batch_idx / len(loader), loss.item(), 100. * corr[0] / batch_size, 100. * correct1 / (batch_size * (batch_idx + 1)), 100. * corr[1] / batch_size, 100. * correct5 / (batch_size * (batch_idx + 1)))) return loss.item(), correct1 / len(loader.sampler), correct5 / len(loader.sampler)