def evaluate(self): logging.info("Evaluating on Dev Dataset") answers = dict() self.model.eval() temp_ema = EMA(0) for name, param in self.model.named_parameters(): if param.requires_grad: temp_ema.register(name, param.data) param.data.copy_(self.ema.get(name)) with torch.no_grad(): for _, batch in enumerate(self.data.dev_iter): p1, p2 = self.model(batch) batch_size, _ = p1.size() _, s_idx = p1.max(dim=1) _, e_idx = p2.max(dim=1) for i in range(batch_size): qid = batch.qid[i] answer = batch.c_word[0][i][s_idx[i]:e_idx[i] + 1] answer = ' '.join( [self.data.WORD.vocab.itos[idx] for idx in answer]) answers[qid] = answer for name, param in self.model.named_parameters(): if param.requires_grad: param.data.copy_(temp_ema.get(name)) results = evaluate(self.args, answers) return results['exact_match'], results['f1']
def __init__(self, args): self.args = args self.device = torch.device("cuda:{}".format(self.args.GPU) if torch. cuda.is_available() else "cpu") self.data = READ(self.args) glove = self.data.WORD.vocab.vectors char_size = len(self.data.CHAR.vocab) self.model = BiDAF(self.args, char_size, glove).to(self.device) self.optimizer = optim.Adadelta(self.model.parameters(), lr=self.args.Learning_Rate) self.ema = EMA(self.args.Exp_Decay_Rate) if APEX_AVAILABLE: # Mixed Precision self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level='O2') for name, param in self.model.named_parameters(): if param.requires_grad: self.ema.register(name, param.data) self.parameters = filter(lambda p: p.requires_grad, self.model.parameters())
def train(epoch): avg_loss = 0.0 epoch_time = 0 # progbar = Progbar(len(train_loader.dataset) // c.batch_size) num_iter_epoch = len(train_loader.dataset) // c.batch_size if c.ema_decay > 0: ema = EMA(c.ema_decay) for name, param in model.named_parameters(): if param.requires_grad: ema.register(name, param) else: ema = None model.train() for num_iter, batch in enumerate(train_loader): start_time = time.time() wav = batch[0].unsqueeze(1) mel = batch[1].transpose(1, 2) lens = batch[2] target = batch[3] if use_cuda: wav = wav.cuda() mel = mel.cuda() target = target.cuda() current_step = num_iter + epoch * len(train_loader) + 1 lr = lr_decay(c.lr, current_step, c.warmup_steps) for params_group in optimizer.param_groups: params_group['lr'] = lr optimizer.zero_grad() out = torch.nn.parallel.data_parallel(model, (wav, mel)) # out = model(wav, mel) loss, fp, tp = criterion(out, target, lens) loss.backward() grad_norm, skip_flag = check_update(model, c.grad_clip, c.grad_top) if skip_flag: optimizer.zero_grad() print(" | > Iteration skipped!!") continue optimizer.step() # model ema if ema is not None: for name, param in model.named_parameters(): if name in ema.shadow: ema.update(name, param.data) step_time = time.time() - start_time epoch_time += step_time if current_step % c.print_iter == 0: print( " | > step:{}/{}\tgloba_step:{}\tloss:{:.4f}\tgrad_norm:{:.4f}\t\ fp:{}\ttp:{}\tlr:{:.5f}\t".format(num_iter, num_iter_epoch, current_step, loss.item(), grad_norm, fp, tp, params_group['lr'])) avg_loss += loss.item() avg_loss /= num_iter return ema, avg_loss
def train(epoch): avg_loss = 0.0 epoch_time = 0 progbar = Progbar(len(train_loader.dataset) // c.batch_size) if c.ema_decay > 0: ema = EMA(c.ema_decay) for name, param in model.named_parameters(): if param.requires_grad: ema.register(name, param) else: ema = None model.train() for num_iter, batch in enumerate(train_loader): start_time = time.time() wav = batch[0].unsqueeze(1) mel = batch[1].transpose(1, 2) lens = batch[2] target = batch[3] if use_cuda: wav = wav.cuda() mel = mel.cuda() target = target.cuda() current_step = num_iter + epoch * len(train_loader) + 1 optimizer.zero_grad() # out = torch.nn.parallel.data_parallel(model, (wav, mel)) out = model(wav, mel) loss, fp, tp = criterion(out, target, lens) loss.backward() grad_norm, skip_flag = check_update(model, 5, 100) if skip_flag: optimizer.zero_grad() print(" | > Iteration skipped!!") continue optimizer.step() # model ema if ema is not None: for name, param in model.named_parameters(): if name in ema.shadow: ema.update(name, param.data) step_time = time.time() - start_time epoch_time += step_time # update progbar.update(num_iter + 1, values=[('total_loss', loss.item()), ('grad_norm', grad_norm.item()), ('fp', fp), ('tp', tp)]) avg_loss += loss.item() return ema, avg_loss
class SOLVER(): def __init__(self, args): self.args = args self.device = torch.device("cuda:{}".format(self.args.GPU) if torch. cuda.is_available() else "cpu") self.data = READ(self.args) glove = self.data.WORD.vocab.vectors char_size = len(self.data.CHAR.vocab) self.model = BiDAF(self.args, char_size, glove).to(self.device) self.optimizer = optim.Adadelta(self.model.parameters(), lr=self.args.Learning_Rate) self.ema = EMA(self.args.Exp_Decay_Rate) if APEX_AVAILABLE: # Mixed Precision self.model, self.optimizer = amp.initialize(self.model, self.optimizer, opt_level='O2') for name, param in self.model.named_parameters(): if param.requires_grad: self.ema.register(name, param.data) self.parameters = filter(lambda p: p.requires_grad, self.model.parameters()) def train(self): criterion = nn.NLLLoss() criterion = criterion.to(self.device) self.model.train() max_dev_em, max_dev_f1 = -1, -1 num_batches = len(self.data.train_iter) logging.info("Begin Training") self.model.zero_grad() loss = 0.0 for epoch in range(self.args.Epoch): self.model.train() for i, batch in enumerate(self.data.train_iter): i += 1 p1, p2 = self.model(batch) batch_loss = criterion( p1, batch.start_idx.to(self.device)) + criterion( p2, batch.end_idx.to(self.device)) if APEX_AVAILABLE: with amp.scale_loss(batch_loss, self.optimizer) as scaled_loss: scaled_loss.backward() else: batch_loss.backward() loss = batch_loss.item() self.optimizer.step() del p1, p2, batch_loss for name, param in self.model.named_parameters(): if param.requires_grad: self.ema.update(name, param.data) self.model.zero_grad() logging.info("Epoch [{}/{}] Step [{}/{}] Train Loss {}".format(epoch+1, self.args.Epoch, \ i, int(num_batches) +1, round(loss,3))) if epoch > 7: if i % 100 == 0: dev_em, dev_f1 = self.evaluate() logging.info("Epoch [{}/{}] Dev EM {} Dev F1 {}".format(epoch + 1, self.args.Epoch, \ round(dev_em,3), round(dev_f1,3))) self.model.train() if dev_f1 > max_dev_f1: max_dev_f1 = dev_f1 max_dev_em = dev_em dev_em, dev_f1 = self.evaluate() logging.info("Epoch [{}/{}] Dev EM {} Dev F1 {}".format(epoch + 1, self.args.Epoch, \ round(dev_em,3), round(dev_f1,3))) self.model.train() if dev_f1 > max_dev_f1: max_dev_f1 = dev_f1 max_dev_em = dev_em logging.info('Max Dev EM: {} Max Dev F1: {}'.format( round(max_dev_em, 3), round(max_dev_f1, 3))) def evaluate(self): logging.info("Evaluating on Dev Dataset") answers = dict() self.model.eval() temp_ema = EMA(0) for name, param in self.model.named_parameters(): if param.requires_grad: temp_ema.register(name, param.data) param.data.copy_(self.ema.get(name)) with torch.no_grad(): for _, batch in enumerate(self.data.dev_iter): p1, p2 = self.model(batch) batch_size, _ = p1.size() _, s_idx = p1.max(dim=1) _, e_idx = p2.max(dim=1) for i in range(batch_size): qid = batch.qid[i] answer = batch.c_word[0][i][s_idx[i]:e_idx[i] + 1] answer = ' '.join( [self.data.WORD.vocab.itos[idx] for idx in answer]) answers[qid] = answer for name, param in self.model.named_parameters(): if param.requires_grad: param.data.copy_(temp_ema.get(name)) results = evaluate(self.args, answers) return results['exact_match'], results['f1']
BATCH_SIZE = 256 CKPT = 'byol.pt' EPOCHS = 50 # %% data = glob.glob('data/no_label/*.jpg')[:10000] train_dataset = dataset.BYOLDataset(data) train_dataloader = DataLoader(train_dataset, shuffle=True, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE) # %% byol_model = BYOL().to(DEVICE) max_steps = len(train_dataloader) ema = EMA(max_steps, tau=0.99) optimizer = torch.optim.Adam(byol_model.parameters(), lr=1e-4) ema = EMA(max_steps) # %% for epoch in range(EPOCHS): print(f'Epoch {epoch+1}') t_loss = [] t_loader = tqdm(train_dataloader) for batch in t_loader: optimizer.zero_grad() img_1, img_2 = batch[0].to(DEVICE), batch[1].to(DEVICE) _, _, loss = byol_model(img_1, img_2) loss.backward() optimizer.step()
if __name__ == "__main__": # Set up model AE = AutoEncoders[args.mode] if args.adv_train in [0, 1]: ae = AE(args.e1, args.d, args.e2) elif args.adv_train == 2: ae = AE(args.e1, args.d, args.e2, args.t) elif args.adv_train in [3, 4]: ae = AE(args) ae.cuda() # Set up exponential moving average if args.adv_train == 3: ema_dec = [] for di in range(1, args.num_dec+1): ema_dec.append(EMA(args.ema_factor)) dec = eval("ae.d%s" % di) for name, param in dec.named_parameters(): if param.requires_grad: ema_dec[-1].register(name, param.data) ema_se = EMA(args.ema_factor) for name, param in ae.se.named_parameters(): if param.requires_grad: ema_se.register(name, param.data) elif args.adv_train == 4: ema_dec = []; ema_se = [] for di in range(1, args.num_dec+1): ema_dec.append(EMA(args.ema_factor)) dec = eval("ae.d%s" % di) for name, param in dec.named_parameters():
# Set up directories and logs, etc. TimeID, ExpID, rec_img_path, weights_path, log = set_up_dir( args.project_name, args.resume, args.CodeID) logprint = LogPrint(log) args.ExpID = ExpID if __name__ == "__main__": # Set up model AE = AutoEncoders[args.mode] ae = AE(args).cuda() # Set up exponential moving average ema_dec = [] ema_se = [] for di in range(1, args.num_dec + 1): ema_dec.append(EMA(args.ema_factor)) dec = eval("ae.d%s" % di) for name, param in dec.named_parameters(): if param.requires_grad: ema_dec[-1].register(name, param.data) for sei in range(1, args.num_se + 1): ema_se.append(EMA(args.ema_factor)) se = eval("ae.se%s" % sei) for name, param in se.named_parameters(): if param.requires_grad: ema_se[-1].register(name, param.data) # Prepare data train_loader, num_train, test_loader, num_test = set_up_data( args.dataset, args.batch_size)