def predict(self, x, model=None): if model == None: model = deepcopy(self.model) for param in model.parameters(): param.requires_grad = False probs, betas = [], [] for i, data in enumerate(getBatch(self.batch_size, x)): if self.model_type == 'embed': data = pad_to_batch(data) prob, beta = model(data) betas.append(beta) elif self.model_type == 'bow': data = FloatTensor(data) prob = model(data) probs.append(prob) probs = np.array(torch.cat(probs, 0).tolist()) if self.model_type == 'embed': betas = np.array(torch.cat(betas, 0).tolist()) return probs, betas
def complete(fp, source, hidden, dropout, lr, weight_decay, lam, epochs): if source == "cora": data, edges = read_cora_data(fp) elif source == "twitch": data, edges = read_twitch_data(fp) labels, idx, X = parse_data(data) features = build_features(X) labels = encode_label(labels) edges = build_edges(idx, parse_edges(edges)) adj = build_adj(edges, labels) labels_for_lpa = torch.from_numpy(labels).type(torch.FloatTensor) labels = torch.LongTensor(np.where(labels)[1]) # idx_train, idx_val, idx_test = build_idx() idx_train, idx_val, idx_test = build_idx(X.shape[0]) model = GCNLPA(nfeat=features.shape[1], nhid=hidden, nclass=labels.max().item() + 1, adj=adj, dropout_rate=dropout) optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) for i in range(epochs): model.train() optimizer.zero_grad() output, y_hat = model(features, adj, labels_for_lpa) loss_gcn = F.nll_loss(output[idx_train], labels[idx_train]) loss_lpa = F.nll_loss(y_hat, labels) acc_train = accuracy(output[idx_train], labels[idx_train]) loss_train = loss_gcn + lam * loss_lpa loss_train.backward(retain_graph=True) optimizer.step() model.eval() output_val, _ = model(features, adj, labels_for_lpa) loss_val = F.nll_loss(output_val[idx_val], labels[idx_val]) acc_val = accuracy(output_val[idx_val], labels[idx_val]) print('Epoch: {:04d}'.format(i+1), 'loss_train: {:.4f}'.format(loss_train.item()), 'acc_train: {:.4f}'.format(acc_train.item()), 'loss_val: {:.4f}'.format(loss_val.item()), 'acc_val: {:.4f}'.format(acc_val.item())) model.eval() output, _ = model(features, adj, labels_for_lpa) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "accuracy= {:.4f}".format(acc_test.item()))
def train(model, train_loader, val_loader, num_epochs, optimizer): error = nn.CrossEntropyLoss() count = 0 loss_list = [] iteration_list = [] accuracy_list = [] for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): train = Variable(images.view(-1, 1, 28, 28)).cuda() labels = Variable(labels).type(torch.LongTensor).cuda() # Clear gradients optimizer.zero_grad() # Forward propagation outputs = model(train) # Calculate softmax and ross entropy loss loss = error(outputs, labels) # Calculating gradients loss.backward() # Update parameters optimizer.step() count += 1 if count % 50 == 0: # Calculate Accuracy correct = 0 total = 0 # Iterate through test dataset for images, labels in val_loader: test = Variable(images.view(-1, 1, 28, 28)).cuda() labels = labels.type(torch.LongTensor).cuda() # Forward propagation outputs = model(test) # Get predictions from the maximum value predicted = torch.max(outputs.data, 1)[1] # Total number of labels total += len(labels) correct += (predicted == labels).sum() accuracy = 100 * correct / float(total) loss_list.append(loss.data) iteration_list.append(count) accuracy_list.append(accuracy) if count % 500 == 0: # Print Loss print('Iteration: {} Loss: {} Accuracy: {} %'.format( count, loss.data, accuracy)) return iteration_list, accuracy_list, loss_list
def train(train_loader, model, criterion, optimizer, epoch, scheduler, args): train_losses = AverageMeter('Loss', ':.4e') model.train() for data, labels in train_loader: data = data.cuda(args.local_rank) labels = labels.cuda(args.local_rank).squeeze() label_diff = (labels[:, :, 1:] - data[:, :, :2]) (mu_x, sig_x), (mu_y, sig_y) = model(data) #print(label_diff.shape, mu_x.shape, sig_x.shape) #print(labels.shape, mu.shape) nll1 = criterion(label_diff[:, :, 0], mu_x, sig_x, None) nll2 = criterion(label_diff[:, :, 1], mu_y, sig_y, None) nll = nll1 + nll2 train_loss = torch.sum(nll * labels[:, :, 0]) / int( torch.sum(labels[:, :, 0])) train_losses.update(train_loss.item(), int(torch.sum(labels[:, :, 0]))) optimizer.zero_grad() train_loss.backward() optimizer.step() #scheduler.step(train_losses.avg, epoch) return train_losses.avg
def validation(model, criterion, epoch): # evaluation mode model.eval() transform = transforms.Compose([Normalize(), ToTensor()]) data = dataset.Datasets("../dataset/videos/val/", "../dataset/annotations/",transform) dataloader = DataLoader(data, batch_size=batch_size, shuffle=True, num_workers=4) dataset_size = dataloader.dataset.len optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate) running_loss = 0.0 i = 0 # iterate over data for data in dataloader: # getting the inputs and labels x1, x2, y = data['previmg'], data['currimg'], data['currbb'] # wrapping them in Variable if use_gpu: x1, x2, y = Variable(x1.cuda()), Variable(x2.cuda()), Variable(y.cuda(), requires_grad=False) else: x1, x2, y = Variable(x1), Variable(x2), Variable(y, requires_grad=False) # zero the parameter gradients optimizer.zero_grad() # forward output = model(x1, x2) loss = criterion(output, y) # backward + optimize loss.backward() optimizer.step() running_loss += loss.data.item() print('Validation epoch : %d, step : %d, loss : %f' % (epoch , i, loss.data.item())) i = i + 1 val_loss = running_loss/dataset_size return val_loss
def predict(self, model=None): if model == None: model = self.model model.eval() ''' for param in model.parameters(): param.requires_grad = False ''' probs, betas = [], [] for i in range(len(self.val_x)): data = self.val_x[i] prob, beta = model(data) if self.model_type == 'embed': betas.append(np.squeeze(beta.detach().cpu().numpy())) probs.append(prob) probs = np.array(torch.cat(probs, 0).tolist()) betas_same_length = [] if self.model_type == 'embed': longest = 0 for beta in betas: length = beta.shape[-1] longest = max(longest, length) for beta in betas: tmp = np.concatenate( [beta, np.zeros((beta.shape[0], longest - beta.shape[1]))], axis=1) betas_same_length.append(tmp) betas_same_length = np.concatenate(betas_same_length, axis=0) model.train() return probs, betas_same_length
def evaluate(config, model, data_iter, test=False): model.eval() loss_total = 0 with torch.no_grad(): for texts, mask, tokens, labels in tqdm(data_iter): texts = texts.to(config.device) labels = labels.to(config.device) mask = mask.to(config.device) tokens = tokens.to(config.device) label_logits, loss = model((texts, mask, tokens), labels) loss_total += loss result = [] for i in range(label_logits.shape[0]): predicts = recover_intent(config.index2label, label_logits[i]) origin_label = recover_intent(config.index2label, labels[i]) result.append({"predict": predicts, "label": origin_label}) total = len(data_iter) print("%d samples val" % total) print("\t val loss:", loss_total / total) if test: print("!" * 20 + "test" + "!" * 20) precision, recall, F1 = calculate_f1(result) print("-" * 20 + "intent" + "-" * 20) print("\t Precision: %.2f" % (100 * precision)) print("\t Recall: %.2f" % (100 * recall)) print("\t F1: %.2f" % (100 * F1)) return F1
def evaluate(self, train_score=False, model=None): if model == None: model = self.model model.eval() if train_score == True: assert self.train_set is not None prob = [] y = [] num_iter = self.iterator.get_num_batches(self.train_set) for i in range(num_iter): batch = next(iter(self.iterator(self.train_set))) x = batch["tokens"] targets = batch["label"].type(ByteTensor) tmpprob, _ = model(x) prob.append(tmpprob) y.append(targets) prob = torch.cat(prob, 0).detach().cpu().numpy() y = torch.cat(y, 0) else: prob, _ = self.predict(model) y = self.val_y pred = prob > 0.5 score = multilabel_eval(y.detach().cpu().numpy(), pred, full=True) model.train() return score
def train(train_loader, model, criterion, optimizer, epoch, scheduler, args): train_losses = AverageMeter('Loss', ':.4e') train_acc = AverageMeter('Accuracy', ':.4e') model.train() for data, labels in train_loader: data = data.cuda(args.local_rank) labels = labels.cuda(args.local_rank).squeeze() output = model(data).squeeze(-1) output = output.reshape(-1, 14, 14)[:, 1:-1, 1:-1].reshape(-1, 144) labels = labels.reshape(-1, 14, 14)[:, 1:-1, 1:-1].reshape( -1, 144) # cutting out the corners train_loss = criterion(output, labels) # reduction = sum train_losses.update(train_loss.item(), np.prod(output.shape)) train_loss = train_loss / np.prod(output.shape) # reverting to mean x = DCN(torch.sigmoid(output)) x = np.where(x >= 0.5, 1, 0) answer_ratio = np.mean(np.where(x == DCN(labels), 1, 0)) train_acc.update(answer_ratio, np.prod(output.shape)) optimizer.zero_grad() train_loss.backward() optimizer.step() scheduler.step(train_losses.avg, epoch) return train_losses.avg, train_acc.avg
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0. start_time = time.time() hidden = model.init_hidden(BATCH_SIZE) for batch, i in enumerate(range(0, train_data.size(0) - 1, SEQ_LENGTH)): data, targets = custom_get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = reset_hidden(hidden) model.zero_grad() output, hidden = model(data, hidden) # loss = criterion(output, targets) TODO loss = sum([criterion(*args) for args in zip(output, targets)]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP) for p in model.parameters(): p.data.add_(-lr, p.grad.data) total_loss += loss.item() if batch % LOG_INTERVAL == 0 and batch > 0: cur_loss = total_loss / LOG_INTERVAL elapsed = time.time() - start_time print('| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // SEQ_LENGTH, lr, elapsed * 1000 / LOG_INTERVAL, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time()
def evaluate(data_source): # Turn on evaluation mode which disables dropout. model.eval() total_loss = 0. hidden = model.init_hidden(eval_batch_size) with torch.no_grad(): for i in range(0, data_source.size(0) - 1, SEQ_LENGTH): data, targets = custom_get_batch(data_source, i) output, hidden = model(data, hidden) output_flat = output total_loss += len(data) * criterion(output_flat, targets).item() hidden = reset_hidden(hidden) return total_loss / (len(data_source) - 1)
def validate(val_list, model, epoch, criterion): data_time = AverageMeter() batch_time = AverageMeter() losses = AverageMeter() mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] transform1 = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) DatasetLoader_val = listDataset(val_list, shuffle=False, transform=transform1, train=False, batch_size=args.batch_size, num_workers=args.workers) val_loader = torch.utils.data.DataLoader(DatasetLoader_val, batch_size=args.batch_size) log_text = "epoch %d, processed %d samples, lr % .10f "\ %(epoch, epoch * len(val_loader.dataset), args.lr) log_print(log_text, color="red", attrs=["bold"]) model.eval() end = time.time() mae = 0 for i, (img, gt_density_map) in enumerate(val_loader): data_time.update(time.time() - end) img = img.cuda() img = Variable(img) # gt_density_map = gt_density_map.type(torch.FloatTensor).unsqueeze(0) gt_density_map = gt_density_map.cuda() gt_density_map = Variable(gt_density_map) # TODO et_density_map = model(img) loss = criterion(gt_density_map, et_density_map) # TODO losses.update(loss.item(), img.size(0)) batch_time.update(time.time() - end) mae += abs(et_density_map.data.sum() - gt_density_map.sum()) #todo end = time.time() if i % args.print_freq == 0: print_str = "Epoch: [{0}][{1}/{2}]\t"\ .format(epoch, i, len(val_loader)) print_str += "Data time {data_time.cur:.3f}({data_time.avg:.3f})\t"\ .format(data_time = data_time) print_str += "Batch time {batch_time.cur:.3f}({batch_time.avg:.3f})\t"\ .format(batch_time = batch_time) print_str += "Loss {loss.cur:.4f}({loss.avg:.4f})\t"\ .format(loss = losses) log_print(print_str, color="red", attrs=["bold"]) mae = mae / len(val_loader) return losses.avg, mae
def train(model, dataloader, criterion, optimizer, num_epochs, model_save_dir): since = time.time() dataset_size = dataloader.dataset.len train_l = [] valid_l = [] running_loss = 0.0 i = 0 x = list() for epoch in range(num_epochs): print('------------------EPOCH {}/{}------------------'.format(epoch+1, num_epochs)) model.train() x.append(epoch + 1) # iterating over data for data in dataloader: # getting the inputs and labels x1, x2, y = data['previmg'], data['currimg'], data['currbb'] # wrapping them in variable if use_gpu: x1, x2, y = Variable(x1.cuda()), Variable(x2.cuda()), Variable(y.cuda(), requires_grad=False) else: x1, x2, y = Variable(x1), Variable(x2), Variable(y, requires_grad=False) # zero the parameter gradients optimizer.zero_grad() # forward output = model(x1, x2) loss = criterion(output, y) # backward + optimize loss.backward() optimizer.step() print('training epoch : %d, step : %d, loss : %f' % (epoch+1, i, loss.data.item())) i = i + 1 running_loss += loss.data.item() epoch_loss = running_loss / dataset_size train_l.append(epoch_loss) print('-------------Loss: {:.4f} in epoch: {}-------------'.format(epoch_loss, epoch+1)) val_loss = validation(model, criterion, epoch+1) print('Validation Loss: {:.4f}'.format(val_loss)) valid_l.append(val_loss) path = model_save_dir + 'model_n_epoch_' + str(num_epochs) + '.pth' torch.save(model.state_dict(), path) # plotting the loss graphics both for validation and training. plot_loss_table(x, train_l, valid_l) time_elapsed = time.time() - since print('Training completed in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) return model
def test_topic(config, test_data, m, test_target, model=None, th=None, eval_mark='test'): test_loader = build_dataloader_topic(test_data, m, test_target, config.batch_size) if config.gpu: if torch.cuda.device_count() > 1: logging.info('use multiple gpu') model = torch.nn.DataParallel(model) torch.cuda.manual_seed_all(config.seed) model.to('cuda') model.eval() logging.info("prediction") total_label = [] total_probs = [] for batch in tqdm(test_loader): X, m, y = batch if config.gpu: X = X.to('cuda') y = y.to('cuda') m = m.to('cuda') prob = model(X, m) total_label.extend(y.data.cpu().clone().numpy()) total_probs.extend(prob.data.cpu().clone().numpy()) total_probs = np.array(total_probs) total_label = np.array(total_label) if 'dev' in eval_mark and th == None: best_th, best_f1 = optimal_threshold(total_label, total_probs, Ns=1000) return best_th ap = round(average_precision_score(total_label, total_probs), 4) pre_label = np.zeros(total_label.shape, dtype=int) idx = np.where(total_probs >= th)[0] pre_label[idx] = 1 pre, rec, f1 = pre_rec_f1(total_label, pre_label) return [ap, pre, rec, f1]
def run_epoch(ids_corpus, train, dev, test, model, optimizer, args): ''' Run one epoch (one pass of data) Return average training loss, training cost, dev and test evaluations after this epoch. ''' #set model to training mode model.train() #set model to train mode train_batches = create_batches(ids_corpus, train, args.batch_size, padding_id=0, perm=None, pad_left=args.pad_left) N = len(train_batches) train_loss = 0.0 train_cost = 0.0 for i in xrange(N): batch = train_batches[i] # print "batch title..", batch[0][0] h_final = model(batch) loss = max_margin_loss(args, h_final, batch, args.margin) cost = loss + model.get_l2_reg() ##backprop optimizer.zero_grad() loss.backward() # back propagation, compute gradient optimizer.step() # update parameters train_loss += loss.data train_cost += cost.data if i % 10 == 0: say("\r{}/{}".format(i, N)) dev_eva = None test_eva = None if i == N - 1: # last batch if dev is not None: dev_eva = model.evaluate(dev) if test is not None: test_eva = model.evaluate(test) return (train_loss / (i + 1))[0], (train_cost / (i + 1))[0], dev_eva, test_eva
def evaluate(model, dataset, collator, opt): sampler = SequentialSampler(dataset) dataloader = DataLoader(dataset, sampler=sampler, batch_size=opt.per_gpu_batch_size, drop_last=False, num_workers=10, collate_fn=collator) model.eval() if hasattr(model, "module"): model = model.module total = 0 eval_loss = [] avg_topk = {k: [] for k in [1, 2, 5] if k <= opt.n_context} idx_topk = {k: [] for k in [1, 2, 5] if k <= opt.n_context} inversions = [] with torch.no_grad(): for i, batch in enumerate(dataloader): (idx, question_ids, question_mask, context_ids, context_mask, gold_score) = batch _, _, scores, loss = model( question_ids=question_ids.cuda(), question_mask=question_mask.cuda(), passage_ids=context_ids.cuda(), passage_mask=context_mask.cuda(), gold_score=gold_score.cuda(), ) src.evaluation.eval_batch(scores, inversions, avg_topk, idx_topk) total += question_ids.size(0) inversions = src.util.weighted_average(np.mean(inversions), total, opt)[0] for k in avg_topk: avg_topk[k] = src.util.weighted_average(np.mean(avg_topk[k]), total, opt)[0] idx_topk[k] = src.util.weighted_average(np.mean(idx_topk[k]), total, opt)[0] return loss, inversions, avg_topk, idx_topk
def train(model, trainloader, validloader, criterion, optimizer, epochs, device, load_pth, model_sv_pth, plot=True, visualize=False, load_model=False): if load_model: model.load_state_dict(torch.load(load_pth)) model.train() stats = [] valid_loss_min = np.Inf print('Training Started.....') for epoch in range(epochs): train_loss = 0 train_iou = [] for i, data in enumerate(trainloader): inputs, mask, rgb = data inputs, mask = inputs.to(device), mask.to(device) optimizer.zero_grad() output = model(inputs.float()) target = mask.argmax(1) loss = criterion(output, target.long()) loss.backward() optimizer.step() train_loss += loss.item() * inputs.size(0) iou = iou_pytorch(output.argmax(1), target) train_iou.extend(iou) if visualize and epoch%10==0 and i == 0: print('The training images') show_databatch(inputs.detach().cpu(), size=(8,8)) print('The original masks') show_databatch(rgb.detach().cpu(), size=(8,8)) RGB_mask = mask_to_rgb(output.detach().cpu(), id2code) print('Predicted masks') show_databatch(torch.tensor(RGB_mask).permute(0,3,1,2), size=(8,8)) miou = torch.FloatTensor(train_iou).mean() train_loss = train_loss / len(trainloader.dataset) print('Epoch',epoch,':',f'Lr ({optimizer.param_groups[0]["lr"]})',f'\n\t\t Training Loss: {train_loss:.4f},',f' Training IoU: {miou:.3f},') with torch.no_grad(): valid_loss, valid_loss_min = Validate(model, validloader, criterion, valid_loss_min, device, model_sv_pth) stats.append([train_loss, valid_loss]) stat = pd.DataFrame(stats, columns=['train_loss', 'valid_loss']) print('Finished Training') if plot: plotCurves(stat)
def train(model, optimizer, dataloader, epoch, device, ad_train): fgm = FGM(model) model.train() pbar = tqdm(dataloader) for data in pbar: inputs, tags, mask = data['token_ids'], data['tags'], data[ 'mask'].bool() inputs = inputs.to(device) tags = tags.to(device) mask = mask.to(device) if ad_train: fgm.attack() loss = model(inputs, tags, mask) optimizer.zero_grad() loss.backward() if ad_train: fgm.restore() optimizer.step() pbar.set_description( f"Train epoch: {epoch}/{EPOCH}, loss: {loss.tolist():.3f}") logger.info(f"Train epoch: {epoch+1}/{EPOCH}, loss: {loss.tolist():.3f}")
def get_predict_npy(cfg): submission = pd.read_csv('../data/sample_submission.csv') sz = 512 test_ds = Dataset(submission, fold=None, train=None, tsfm=TestTsfm(sz, tta=True)) test_sampler = BatchSampler(SequentialSampler(test_ds), batch_size=cfg['bs'], drop_last=False) test_dl = DataLoader(test_ds, batch_sampler=test_sampler, num_workers=6, pin_memory=True) model = Model(base=torchvision.models.resnet18) model.half() bn_to_float(model) res = [] with torch.no_grad(): for fold in cfg['fold']: model.load_state_dict( torch.load(f"../weights/{cfg['name']}_fold{fold}.pkl") ['model']) model.cuda() model.eval() predicts = [] for idx, imgs in tqdm(zip(test_sampler, test_dl), total=len(test_dl)): num_sample = imgs.size(0) imgs = imgs.view((-1, sz, sz, 4)) pred = model(imgs.cuda()).sigmoid().view( (num_sample, -1, 28)).mean(dim=1).cpu().numpy() predicts.append(pred) res.append(np.concatenate(predicts, axis=0)) res = np.average(res, axis=0) np.save(f"{cfg['name']}submit", res)
def test(test_loader, model, criterion, args): test_losses = AverageMeter('Loss', ':.4e') test_acc = AverageMeter('Accuracy', ':.4e') model.eval() with torch.no_grad(): for data, labels in test_loader: data = data.cuda(args.local_rank) labels = labels.cuda(args.local_rank).squeeze() output = model(data).squeeze(-1) output = output.reshape(-1, 14, 14)[:, 1:-1, 1:-1].reshape(-1, 144) labels = labels.reshape(-1, 14, 14)[:, 1:-1, 1:-1].reshape( -1, 144) # cutting out the corners test_loss = criterion(output, labels) # reduction = sum test_losses.update(test_loss.item(), np.prod(output.shape)) x = DCN(torch.sigmoid(output)) x = np.where(x >= 0.5, 1, 0) answer_ratio = np.mean(np.where(x == DCN(labels), 1, 0)) test_acc.update(answer_ratio, np.prod(output.shape)) return test_losses.avg, test_acc.avg
def test(test_loader, model, criterion, args): test_losses = AverageMeter('Loss', ':.4e') model.eval() with torch.no_grad(): for data, labels in test_loader: data = data.cuda(args.local_rank) labels = labels.cuda(args.local_rank).squeeze() label_diff = (labels[:, :, 1:] - data[:, :, :2]) (mu_x, sig_x), (mu_y, sig_y) = model(data) nll1 = criterion(label_diff[:, :, 0], mu_x, sig_x, None) nll2 = criterion(label_diff[:, :, 1], mu_y, sig_y, None) nll = nll1 + nll2 test_loss = torch.sum(nll * labels[:, :, 0]) / int( torch.sum(labels[:, :, 0])) test_losses.update(test_loss.item(), int(torch.sum(labels[:, :, 0]))) return test_losses.avg
def train(): tot_loss = 0.0 tot_correct = 0 tot_lsl = 0.0 tot_lss_1 = 0.0 tot_lss_2 = 0.0 tot_lsd = 0.0 for inputs, labels in train_loader: inputs = inputs.to(device) labels = labels.to(device) # CutMix regularizer label_original = F.one_hot(labels, 10) lam = np.random.beta(cutmix_beta, cutmix_beta) rand_index = torch.randperm(inputs.size()[0]) x_cutmix = inputs.clone().detach() x_a = inputs[rand_index, :, :, :] labels_a = labels[rand_index] bbx1, bby1, bbx2, bby2 = rand_bbox(inputs.size(), lam) M = torch.zeros((inputs.size()[-2], inputs.size()[-1])) M = M.to(device) M[bbx1:bbx2, bby1:bby2] = 1 x_cutmix[:, :, bbx1:bbx2, bby1:bby2] = x_a[:, :, bbx1:bbx2, bby1:bby2] lam = ((bbx2 - bbx1) * (bby2 - bby1) / (inputs.size()[-1] * inputs.size()[-2])) label_cutmix = lam * label_original[rand_index, :] + ( 1 - lam) * label_original # x_a model.eval() with torch.no_grad(): _dummy1, _dummy2, _dummpy3, Y_a = model(x_a) # CutMix model.train() optimizer.zero_grad() outputs, pool_outputs, M_hat, Y_cutmix = model(x_cutmix) # Resize M to H0 * W0 M = M.unsqueeze(dim=0).unsqueeze(dim=1) M = M.repeat(inputs.size()[0], 1, 1, 1) M_resizer = torch.nn.MaxPool2d(int(M.size()[-1] / M_hat.size()[-1])) M = M_resizer(M) lsl = lam * criterion_ce(outputs, labels_a) + (1 - lam) * criterion_ce( outputs, labels) lss_1 = criterion_lss1(M_hat, M) lss_2 = criterion_lss2(M[0, 0, :, :] * Y_cutmix, M[0, 0, :, :] * Y_a) lsd = criterion_lss2(outputs, pool_outputs.detach()) + 0.5 * ( lam * criterion_ce(pool_outputs, labels_a) + (1 - lam) * criterion_ce(pool_outputs, labels)) # loss = lsl + lss_1 + lss_2 + lsd loss = lsl loss.backward() optimizer.step() _, preds = torch.max(outputs, 1) _, labels = torch.max(label_cutmix, 1) tot_loss += loss.item() * inputs.size(0) tot_correct += torch.sum(preds == labels.data).item() tot_lsl += lsl.item() * inputs.size(0) tot_lss_1 += lss_1.item() * inputs.size(0) tot_lss_2 += lss_2.item() * inputs.size(0) tot_lsd += lsd.item() * inputs.size(0) len_ = len(train_loader.dataset) epoch_loss = tot_loss / len_ epoch_acc = tot_correct / len_ epoch_lsl = tot_lsl / len_ epoch_lss_1 = tot_lss_1 / len_ epoch_lss_2 = tot_lss_2 / len_ epoch_lsd = tot_lsd / len_ return epoch_loss, epoch_acc, epoch_lsl, epoch_lss_1, epoch_lss_2, epoch_lsd
def fit(self, data_set, class_names): ''' if not hasattr(self, 'i2v'): raise ValueError('Must set vocabulary dict first') ''' # TODO: Initialize these in the __init__() self.id2label = {} self.label2id = {} for i, label in enumerate(class_names): self.id2label[str(i)] = label self.label2id[label] = i model = self.model optimizer = optim.Adam(model.parameters()) max_val_match = 0. max_val_hs = 0. self.train_set = data_set for epoch in range(self.num_epoches): losses = [] match = [] hs = [] if epoch == self.num_epoches - 1: train_prob = [] if self.model_type == 'embed': train_beta = [] num_iter = self.iterator.get_num_batches(data_set) for i in range(num_iter): batch = next(iter(self.iterator(data_set))) inputs = batch["tokens"] targets = batch["label"] model.zero_grad() prob, beta, loss = model(inputs, targets.type(FloatTensor)) if epoch == self.num_epoches - 1: train_prob.append(prob.detach()) if self.model_type == 'embed': train_beta.append( np.squeeze(beta.detach().cpu().numpy())) losses.append(loss.data.item()) pred = prob > 0.5 true = targets.type(ByteTensor) match += [(pred[i][true[i] == 1] == 1).any().float() for i in range(len(pred))] hs += [(((pred == 1) * (true == 1)).sum(1) / (((pred == 1) + (true == 1)) > 0).sum(1)).float()] loss.backward() optimizer.step() if epoch % self.valid_freq == 0 or epoch == self.num_epoches - 1: match_epoch = torch.mean(torch.stack(match)) hs_epoch = torch.mean(torch.cat(hs)) prob, _ = self.predict(model) pred = prob > 0.5 #print(np.sum(pred, axis=-1)) tmp = self.val_y.detach().cpu().numpy() #print(np.sum(tmp, axis=-1)[0:30]) #print(np.sum((pred==1)*(tmp==1), axis=-1)[0:30]) #print(np.sum(((pred==1) + (tmp==1))>0, axis=-1)[0:30]) val_match = np.mean([(pred[i][tmp[i] == 1] == 1).any() for i in range(len(pred))]) val_hs = (((pred == 1) * (tmp == 1)).sum(1) / (((pred == 1) + (tmp == 1)) > 0).sum(1)).mean() print("--- epoch:", epoch, "---") print( "[%d/%d] loss_epoch : %0.2f" % (epoch, self.num_epoches, np.mean(losses)), "val_match : %0.4f" % val_match, "match_epoch : %0.4f" % match_epoch, "val_hs : %0.4f" % val_hs, "hs_epoch : %0.4f" % hs_epoch) if val_hs >= max_val_hs: max_val_hs = val_hs self.model = deepcopy(model) if val_match >= max_val_match: max_val_match = val_match self.model = deepcopy(model) self.model.zero_grad() print('Max_scores', max_val_match, max_val_hs) train_score = self.evaluate(train_score=True) val_score = self.evaluate(train_score=False) print('training', train_score) print('validation', val_score) train_prob = np.array(torch.cat(train_prob, 0).tolist()) if self.model_type == 'embed': longest = 0 betas_same_length = [] for beta in train_beta: length = beta.shape[-1] longest = max(longest, length) print(longest) for beta in train_beta: tmp = np.concatenate( [beta, np.zeros((beta.shape[0], longest - beta.shape[1]))], axis=1) betas_same_length.append(tmp) betas_same_length = np.concatenate(betas_same_length, axis=0) return train_prob, betas_same_length return train_prob
for inputs, labels in tqdm(Train_loader, total=len(Train_loader)): counter += 1 batch_size = inputs.shape[0] h = model.init_hidden(batch_size) if (train_on_gpu): inputs, labels = inputs.cuda(), labels.cuda() # Creating new variables for the hidden state, otherwise # we'd backprop through the entire training history h = tuple([each.data for each in h]) # zero accumulated gradients model.zero_grad() # get the output from the model output, h = model(inputs, h) # calculate the loss and perform backprop loss = criterion(output.squeeze(), labels.float()) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. nn.utils.clip_grad_norm_(model.parameters(), clip) optimizer.step() if counter % print_every == 0: # Get validation loss val_losses = [] model.eval() for inputs, labels in Valid_loader: # Creating new variables for the hidden state, otherwise
def train(opt): # set image transformation methods transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(256), # we get only the center of that rescaled transforms.RandomCrop(224), # random crop within the center crop #transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) # prepare the train, validate and test loader #training_set = ImageDataset(opt.dish_img_path, opt.dish_info_path, opt.ingr_img_path, opt.vocab_path, # 'train', transform=transform) data_set = ImageData(opt.dish_img_path, opt.dish_info_path, opt.ingr_img_path, opt.vocab_path, 'test', transform=transform) #testing_set = ImageDataset(opt.dish_img_path, opt.dish_info_path, opt.ingr_img_path, opt.vocab_path, # 'test', transform=transform) #train_loader = data.DataLoader(training_set, batch_size=opt.batch_size, shuffle=True) #val_loader = data.DataLoader(validate_set, batch_size=opt.batch_size, shuffle=False) #test_loader = data.DataLoader(testing_set, batch_size=opt.batch_size, shuffle=False) data_loader = data.DataLoader(data_set, batch_size=opt.batch_size, shuffle=True) # set the model #model = img2img(num_ingr=opt.num_ingr, imfeatDim=opt.imfeatDim, embDim=opt.embDim) #model = model.to(device) model = models.resnet50(pretrained=True) resnet_modules = list(model.children())[:-1] model = nn.Sequential(*resnet_modules) model = model.to(device) # set the optimizer criterion = nn.CosineEmbeddingLoss(0.1).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) num_iter_per_epoch = len(data_loader) print(num_iter_per_epoch) model.eval() rec = {} for epoch in range(opt.num_epochs): for itr, input in enumerate(data_loader): #[dish_img, ingr_imgs, igr_ln] = [record.to(device) for record in input] #[dish_img, ingr_imgs, igr_ln] = [record.to(device) for record in input] dish_img = input[0].to(device) idx = input[1].numpy() #target = [record.to(device) for record in target] #optimizer.zero_grad() dish_emb = model(dish_img) dish_emb = dish_emb.view(dish_emb.size(0), -1).cpu().detach().numpy() for i in range(len(idx)): rec[idx[i]] = dish_emb[i] print(len(rec)) sys.stdout.flush() #loss = criterion(dish_emb, ingr_embs, target[0]) #loss.backward() #optimizer.step() #print("Training: Iteration: {}/{} Epoch: {}/{} Loss: {}".format( # itr + 1, num_iter_per_epoch, epoch + 1, opt.num_epochs, loss)) pickle.dump(rec, open('test_feat2.pkl', 'wb'))
def train(model, optimizer, scheduler, global_step, train_dataset, dev_dataset, opt, collator, best_eval_loss): if opt.is_main: try: tb_logger = torch.utils.tensorboard.SummaryWriter( Path(opt.checkpoint_dir) / opt.name) except: tb_logger = None logger.warning('Tensorboard is not available.') train_sampler = DistributedSampler( train_dataset) if opt.is_distributed else RandomSampler(train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=opt.per_gpu_batch_size, drop_last=True, num_workers=10, collate_fn=collator) loss, curr_loss = 0.0, 0.0 epoch = 1 model.train() while global_step < opt.total_steps: if opt.is_distributed > 1: train_sampler.set_epoch(epoch) epoch += 1 for i, batch in enumerate(train_dataloader): global_step += 1 (idx, question_ids, question_mask, passage_ids, passage_mask, gold_score) = batch _, _, _, train_loss = model( question_ids=question_ids.cuda(), question_mask=question_mask.cuda(), passage_ids=passage_ids.cuda(), passage_mask=passage_mask.cuda(), gold_score=gold_score.cuda(), ) train_loss.backward() if global_step % opt.accumulation_steps == 0: torch.nn.utils.clip_grad_norm_(model.parameters(), opt.clip) optimizer.step() scheduler.step() model.zero_grad() train_loss = src.util.average_main(train_loss, opt) curr_loss += train_loss.item() if global_step % opt.eval_freq == 0: eval_loss, inversions, avg_topk, idx_topk = evaluate( model, dev_dataset, collator, opt) if eval_loss < best_eval_loss: best_eval_loss = eval_loss if opt.is_main: src.util.save(model, optimizer, scheduler, global_step, best_eval_loss, opt, dir_path, 'best_dev') model.train() if opt.is_main: log = f"{global_step} / {opt.total_steps}" log += f" -- train: {curr_loss/opt.eval_freq:.6f}" log += f", eval: {eval_loss:.6f}" log += f", inv: {inversions:.1f}" log += f", lr: {scheduler.get_last_lr()[0]:.6f}" for k in avg_topk: log += f" | avg top{k}: {100*avg_topk[k]:.1f}" for k in idx_topk: log += f" | idx top{k}: {idx_topk[k]:.1f}" logger.info(log) if tb_logger is not None: tb_logger.add_scalar("Evaluation", eval_loss, global_step) tb_logger.add_scalar("Training", curr_loss / (opt.eval_freq), global_step) curr_loss = 0 if opt.is_main and global_step % opt.save_freq == 0: src.util.save(model, optimizer, scheduler, global_step, best_eval_loss, opt, dir_path, f"step-{global_step}") if global_step > opt.total_steps: break
def test(test_loader, model, criterion, sampler, norm_constant, args): test_losses_list = [] step_num = test_loader.dataset.test_labels.shape[1] for i in range(step_num): test_losses_list.append(AverageMeter('Loss_' + str(i), ':.4e')) test_losses_list.append(AverageMeter('Total_Loss', ':.4e')) model.eval() with torch.no_grad(): for i, (data, labels, target_mask_list, mask_input_new, mask_input_next, mask_input_check) in enumerate(test_loader): data = data.cuda(args.local_rank) hidden, cell = None, None total_count = [0. for i in range(step_num + 1)] for n in range(step_num): for j in range(step_num): total_count[j] += int(torch.sum(target_mask_list[n][j])) total_count[-1] += int(torch.sum(target_mask_list[n][j])) for n in range(step_num): #labels.shape[1] label = labels.cuda(args.local_rank)[:, n] hidden_mask = mask_input_new.cuda(args.local_rank)[:, n] data_mask = mask_input_next.cuda(args.local_rank)[:, n] label_mask = mask_input_check.cuda(args.local_rank)[:, n] label_diff = (label - data[:, :, :-1]) # remove Group part hidden, cell = model.module.initialize(data, hidden, cell, hidden_mask) (mu_x, sig_x), (mu_y, sig_y), (mu_z, sig_z), (mu_vx, sig_vx), ( mu_vy, sig_vy), (mu_vz, sig_vz), hidden, cell = model( data, hidden, cell) nll_x = criterion(label_diff[:, :, 0], mu_x, sig_x) nll_y = criterion(label_diff[:, :, 1], mu_y, sig_y) nll_z = criterion(label_diff[:, :, 2], mu_z, sig_z) nll_vx = criterion(label_diff[:, :, 3], mu_vx, sig_vx) nll_vy = criterion(label_diff[:, :, 4], mu_vy, sig_vy) nll_vz = criterion(label_diff[:, :, 5], mu_vz, sig_vz) nll = nll_x + nll_y + nll_z + nll_vx + nll_vy + nll_vz #test_loss = torch.sum(nll * label_mask.squeeze()) #test_losses_list[-1].update(test_loss.item() / int(torch.sum(label_mask)), int(torch.sum(label_mask))) for j in range(step_num): if torch.sum(target_mask_list[n][j] > 0): test_loss_tmp = torch.sum( nll * target_mask_list[n][j].cuda(args.local_rank) ) / int(torch.sum(target_mask_list[n][j])) test_losses_list[j].update( test_loss_tmp.item(), int(torch.sum(target_mask_list[n][j]))) test_losses_list[-1].update( test_loss_tmp.item(), int(torch.sum(target_mask_list[n][j]))) if args.use_sample: sample_x = sampler(mu_x, sig_x).unsqueeze(-1) sample_y = sampler(mu_y, sig_y).unsqueeze(-1) sample_z = sampler(mu_z, sig_z).unsqueeze(-1) sample_vx = sampler(mu_vx, sig_vx).unsqueeze(-1) sample_vy = sampler(mu_vy, sig_vy).unsqueeze(-1) sample_vz = sampler(mu_vz, sig_vz).unsqueeze(-1) else: sample_x = mu_x.unsqueeze(-1) sample_y = mu_y.unsqueeze(-1) sample_z = mu_z.unsqueeze(-1) sample_vx = mu_vx.unsqueeze(-1) sample_vy = mu_vy.unsqueeze(-1) sample_vz = mu_vz.unsqueeze(-1) sample = torch.cat((sample_x, sample_y, sample_z, sample_vx, sample_vy, sample_vz), dim=-1) if type(sample) == type(None): if args.local_rank == 0: print('tuzim') sample = label_diff else: sample = sample.cuda(args.local_rank) next_data = sample + data[:, :, :-1] for j in range(labels.shape[0]): next_first_mask = (data_mask[j] - label_mask[j]).squeeze(-1).bool() next_data[j][next_first_mask] = label[j][next_first_mask] data = torch.cat((next_data, 2 * data_mask - 1), dim=-1) # data = torch.cat((mu.squeeze(), data[:,:,-1].unsqueeze(-1)), dim = -1) del nll_x, nll_y, nll_z, nll_vx, nll_vy, nll_vz, nll del sample_x, sample_y, sample_z, sample_vx, sample_vy, sample_vz, sample, next_data del data, hidden, cell, hidden_mask, data_mask, label_mask, label_diff del mu_x, sig_x, mu_y, sig_y, mu_z, sig_z, mu_vx, sig_vx, mu_vy, sig_vy, mu_vz, sig_vz torch.cuda.empty_cache() return [test_losses_list[i].avg for i in range(len(test_losses_list))], [ test_losses_list[i].count for i in range(len(test_losses_list)) ]
def train_topic(config, num, model_fname, train_data, valid_data, train_label, valid_label, train_mask, valid_mask): logging.info("build data loader") train_loader = build_dataloader_topic(train_data, train_mask, train_label, config.batch_size, "train") dev_loader = build_dataloader_topic(valid_data, valid_mask, valid_label, config.batch_size) model = Binary_topic_Net(config, num) optimizer = optim.AdamW(model.parameters(), lr=float(config.learning_rate), weight_decay=float(config.weight_decay)) # lr_scheduler = ReduceLROnPlateau(optimizer, mode='min', patience=10, eps=1e-10) torch.manual_seed(config.seed) random.seed(config.seed) np.random.seed( config.seed ) # random batch training seed (shuffle) to ensure reproducibility early_stopping = EarlyStopping(patience=40, verbose=True, path=model_fname) if config.gpu: n_gpu = torch.cuda.device_count() if n_gpu > 1: model = torch.nn.DataParallel(model) torch.cuda.manual_seed_all(config.seed) model.to('cuda') model.train() logging.info("start training...") for epoch in trange(int(config.num_epoch), desc='Epoch'): tr_loss = 0 for step, batch in enumerate(tqdm(train_loader, desc="Iteration")): X, m, y = batch if config.gpu: X = X.to('cuda') y = y.to('cuda') m = m.to('cuda') optimizer.zero_grad() # forward _, loss = model(X, m, y) loss = loss.mean() # backward loss.backward() optimizer.step() tr_loss += loss.item() tr_loss = tr_loss / (step + 1) # validation model.eval() ys_dev = [] ps_dev = [] loss_valid = 0 for step, batch_dev in enumerate(tqdm(dev_loader, desc="Validation")): X_val, m_val, y_val = batch_dev if config.gpu: X_val = X_val.to('cuda') m_val = m_val.to('cuda') y_val = y_val.to('cuda') prob, bt_loss = model(X_val, m_val, y_val) loss_valid += bt_loss.mean() ps_dev.extend(prob.data.cpu().clone().numpy()) ys_dev.extend(y_val.data.cpu().clone().numpy()) loss_valid = loss_valid / (step + 1) th, dev_f1 = optimal_threshold(ys_dev, ps_dev, Ns=50) logging.info( "Epoch: %d | train loss: %.4f | valid loss: %.4f | valid f1: %.4f ", epoch + 1, tr_loss, loss_valid, dev_f1) model.train() early_stopping(-dev_f1, model) # lr_scheduler.step(loss_valid) if early_stopping.early_stop: logging.info("Early Stopping. Model trained.") break
MaxAcc = 0 MaxAcc_loop = 0 MaxAcc_loss = 0 model_num = 0 for j in range(201): print('start loop %d' % j) # training sff_index = np.random.permutation(n) for i in range(0, n, bc): x = Variable(x_train[sff_index[i:(i + bc) if (i + bc) < n else n], ]) y = Variable(y_train[sff_index[i:(i + bc) if (i + bc) < n else n], ]) model.cleargrads() loss = model(x, y) logging.debug("loop: %d, loss = %f" % (j, loss.data)) loss.backward() optimizer.update() # evaluate if j % 5 == 0: with no_backprop_mode(): xt = Variable(x_test) with using_config('train', False): yt = model.fwd(xt) logging.info("loop: %d, loss = %f" % (j, loss.data)) model.cleargrads() ans = yt.data nrow, ncol = ans.shape ok = tp = fp = fn = tn = 0
# placeholders LABELS = [] FEATS = {} # loop through batches for inputs, targets in tqdm(subtrain_data_loader): # move to device inputs = inputs.to(device) # placeholder for batch features features = {} # forward pass [with feature extraction] preds = model(inputs) # add labels to list LABELS.extend(targets.numpy()) # add feats to lists for k in features.keys(): if k not in FEATS.keys(): FEATS[k] = features[k].cpu().numpy() else: FEATS[k] = np.concatenate( (FEATS[k], features[k].cpu().numpy()), axis=0) LABELS = np.asarray(LABELS) ##### SAVE