Ejemplo n.º 1
0
    def evaluate(self):

        logging.info("Evaluating on Dev Dataset")
        answers = dict()

        self.model.eval()

        temp_ema = EMA(0)

        for name, param in self.model.named_parameters():
            if param.requires_grad:
                temp_ema.register(name, param.data)
                param.data.copy_(self.ema.get(name))

        with torch.no_grad():
            for _, batch in enumerate(self.data.dev_iter):

                p1, p2 = self.model(batch)
                batch_size, _ = p1.size()

                _, s_idx = p1.max(dim=1)
                _, e_idx = p2.max(dim=1)

                for i in range(batch_size):
                    qid = batch.qid[i]
                    answer = batch.c_word[0][i][s_idx[i]:e_idx[i] + 1]
                    answer = ' '.join(
                        [self.data.WORD.vocab.itos[idx] for idx in answer])
                    answers[qid] = answer

            for name, param in self.model.named_parameters():
                if param.requires_grad:
                    param.data.copy_(temp_ema.get(name))

        results = evaluate(self.args, answers)

        return results['exact_match'], results['f1']
Ejemplo n.º 2
0
    def __init__(self, args):
        self.args = args
        self.device = torch.device("cuda:{}".format(self.args.GPU) if torch.
                                   cuda.is_available() else "cpu")
        self.data = READ(self.args)
        glove = self.data.WORD.vocab.vectors
        char_size = len(self.data.CHAR.vocab)

        self.model = BiDAF(self.args, char_size, glove).to(self.device)
        self.optimizer = optim.Adadelta(self.model.parameters(),
                                        lr=self.args.Learning_Rate)
        self.ema = EMA(self.args.Exp_Decay_Rate)

        if APEX_AVAILABLE:  # Mixed Precision
            self.model, self.optimizer = amp.initialize(self.model,
                                                        self.optimizer,
                                                        opt_level='O2')

        for name, param in self.model.named_parameters():
            if param.requires_grad:
                self.ema.register(name, param.data)

        self.parameters = filter(lambda p: p.requires_grad,
                                 self.model.parameters())
Ejemplo n.º 3
0
def train(epoch):
    avg_loss = 0.0
    epoch_time = 0
    # progbar = Progbar(len(train_loader.dataset) // c.batch_size)
    num_iter_epoch = len(train_loader.dataset) // c.batch_size
    if c.ema_decay > 0:
        ema = EMA(c.ema_decay)
        for name, param in model.named_parameters():
            if param.requires_grad:
                ema.register(name, param)
    else:
        ema = None
    model.train()

    for num_iter, batch in enumerate(train_loader):
        start_time = time.time()
        wav = batch[0].unsqueeze(1)
        mel = batch[1].transpose(1, 2)
        lens = batch[2]
        target = batch[3]
        if use_cuda:
            wav = wav.cuda()
            mel = mel.cuda()
            target = target.cuda()
        current_step = num_iter + epoch * len(train_loader) + 1
        lr = lr_decay(c.lr, current_step, c.warmup_steps)
        for params_group in optimizer.param_groups:
            params_group['lr'] = lr
        optimizer.zero_grad()
        out = torch.nn.parallel.data_parallel(model, (wav, mel))
        # out = model(wav, mel)
        loss, fp, tp = criterion(out, target, lens)
        loss.backward()
        grad_norm, skip_flag = check_update(model, c.grad_clip, c.grad_top)
        if skip_flag:
            optimizer.zero_grad()
            print(" | > Iteration skipped!!")
            continue
        optimizer.step()
        # model ema
        if ema is not None:
            for name, param in model.named_parameters():
                if name in ema.shadow:
                    ema.update(name, param.data)
        step_time = time.time() - start_time
        epoch_time += step_time
        if current_step % c.print_iter == 0:
            print(
                " | > step:{}/{}\tgloba_step:{}\tloss:{:.4f}\tgrad_norm:{:.4f}\t\
                  fp:{}\ttp:{}\tlr:{:.5f}\t".format(num_iter, num_iter_epoch,
                                                    current_step, loss.item(),
                                                    grad_norm, fp, tp,
                                                    params_group['lr']))
        avg_loss += loss.item()
    avg_loss /= num_iter
    return ema, avg_loss
Ejemplo n.º 4
0
def train(epoch):
    avg_loss = 0.0
    epoch_time = 0
    progbar = Progbar(len(train_loader.dataset) // c.batch_size)
    if c.ema_decay > 0:
        ema = EMA(c.ema_decay)
        for name, param in model.named_parameters():
            if param.requires_grad:
                ema.register(name, param)
    else:
        ema = None
    model.train()
    for num_iter, batch in enumerate(train_loader):
        start_time = time.time()
        wav = batch[0].unsqueeze(1)
        mel = batch[1].transpose(1, 2)
        lens = batch[2]
        target = batch[3]
        if use_cuda:
            wav = wav.cuda()
            mel = mel.cuda()
            target = target.cuda()
        current_step = num_iter + epoch * len(train_loader) + 1
        optimizer.zero_grad()
        # out = torch.nn.parallel.data_parallel(model, (wav, mel))
        out = model(wav, mel)
        loss, fp, tp = criterion(out, target, lens)
        loss.backward()
        grad_norm, skip_flag = check_update(model, 5, 100)
        if skip_flag:
            optimizer.zero_grad()
            print(" | > Iteration skipped!!")
            continue
        optimizer.step()
        # model ema
        if ema is not None:
            for name, param in model.named_parameters():
                if name in ema.shadow:
                    ema.update(name, param.data)
        step_time = time.time() - start_time
        epoch_time += step_time
        # update
        progbar.update(num_iter + 1,
                       values=[('total_loss', loss.item()),
                               ('grad_norm', grad_norm.item()), ('fp', fp),
                               ('tp', tp)])
        avg_loss += loss.item()
    return ema, avg_loss
Ejemplo n.º 5
0
class SOLVER():
    def __init__(self, args):
        self.args = args
        self.device = torch.device("cuda:{}".format(self.args.GPU) if torch.
                                   cuda.is_available() else "cpu")
        self.data = READ(self.args)
        glove = self.data.WORD.vocab.vectors
        char_size = len(self.data.CHAR.vocab)

        self.model = BiDAF(self.args, char_size, glove).to(self.device)
        self.optimizer = optim.Adadelta(self.model.parameters(),
                                        lr=self.args.Learning_Rate)
        self.ema = EMA(self.args.Exp_Decay_Rate)

        if APEX_AVAILABLE:  # Mixed Precision
            self.model, self.optimizer = amp.initialize(self.model,
                                                        self.optimizer,
                                                        opt_level='O2')

        for name, param in self.model.named_parameters():
            if param.requires_grad:
                self.ema.register(name, param.data)

        self.parameters = filter(lambda p: p.requires_grad,
                                 self.model.parameters())

    def train(self):

        criterion = nn.NLLLoss()
        criterion = criterion.to(self.device)

        self.model.train()

        max_dev_em, max_dev_f1 = -1, -1
        num_batches = len(self.data.train_iter)

        logging.info("Begin Training")

        self.model.zero_grad()

        loss = 0.0

        for epoch in range(self.args.Epoch):

            self.model.train()

            for i, batch in enumerate(self.data.train_iter):

                i += 1
                p1, p2 = self.model(batch)
                batch_loss = criterion(
                    p1, batch.start_idx.to(self.device)) + criterion(
                        p2, batch.end_idx.to(self.device))

                if APEX_AVAILABLE:
                    with amp.scale_loss(batch_loss,
                                        self.optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    batch_loss.backward()
                loss = batch_loss.item()

                self.optimizer.step()
                del p1, p2, batch_loss

                for name, param in self.model.named_parameters():
                    if param.requires_grad:
                        self.ema.update(name, param.data)

                self.model.zero_grad()

                logging.info("Epoch [{}/{}] Step [{}/{}] Train Loss {}".format(epoch+1, self.args.Epoch, \
                                                                               i, int(num_batches) +1, round(loss,3)))

                if epoch > 7:
                    if i % 100 == 0:
                        dev_em, dev_f1 = self.evaluate()
                        logging.info("Epoch [{}/{}] Dev EM {} Dev F1 {}".format(epoch + 1, self.args.Epoch, \
                                                                                        round(dev_em,3), round(dev_f1,3)))
                        self.model.train()

                        if dev_f1 > max_dev_f1:
                            max_dev_f1 = dev_f1
                            max_dev_em = dev_em

            dev_em, dev_f1 = self.evaluate()
            logging.info("Epoch [{}/{}] Dev EM {} Dev F1 {}".format(epoch + 1, self.args.Epoch, \
                                                                               round(dev_em,3), round(dev_f1,3)))
            self.model.train()

            if dev_f1 > max_dev_f1:
                max_dev_f1 = dev_f1
                max_dev_em = dev_em

        logging.info('Max Dev EM: {} Max Dev F1: {}'.format(
            round(max_dev_em, 3), round(max_dev_f1, 3)))

    def evaluate(self):

        logging.info("Evaluating on Dev Dataset")
        answers = dict()

        self.model.eval()

        temp_ema = EMA(0)

        for name, param in self.model.named_parameters():
            if param.requires_grad:
                temp_ema.register(name, param.data)
                param.data.copy_(self.ema.get(name))

        with torch.no_grad():
            for _, batch in enumerate(self.data.dev_iter):

                p1, p2 = self.model(batch)
                batch_size, _ = p1.size()

                _, s_idx = p1.max(dim=1)
                _, e_idx = p2.max(dim=1)

                for i in range(batch_size):
                    qid = batch.qid[i]
                    answer = batch.c_word[0][i][s_idx[i]:e_idx[i] + 1]
                    answer = ' '.join(
                        [self.data.WORD.vocab.itos[idx] for idx in answer])
                    answers[qid] = answer

            for name, param in self.model.named_parameters():
                if param.requires_grad:
                    param.data.copy_(temp_ema.get(name))

        results = evaluate(self.args, answers)

        return results['exact_match'], results['f1']
BATCH_SIZE = 256
CKPT = 'byol.pt'
EPOCHS = 50

# %%
data = glob.glob('data/no_label/*.jpg')[:10000]
train_dataset = dataset.BYOLDataset(data)
train_dataloader = DataLoader(train_dataset,
                              shuffle=True,
                              num_workers=NUM_WORKERS,
                              batch_size=BATCH_SIZE)

# %%
byol_model = BYOL().to(DEVICE)
max_steps = len(train_dataloader)
ema = EMA(max_steps, tau=0.99)
optimizer = torch.optim.Adam(byol_model.parameters(), lr=1e-4)
ema = EMA(max_steps)

# %%
for epoch in range(EPOCHS):
    print(f'Epoch {epoch+1}')
    t_loss = []
    t_loader = tqdm(train_dataloader)
    for batch in t_loader:
        optimizer.zero_grad()
        img_1, img_2 = batch[0].to(DEVICE), batch[1].to(DEVICE)
        _, _, loss = byol_model(img_1, img_2)

        loss.backward()
        optimizer.step()
Ejemplo n.º 7
0
if __name__ == "__main__":
  # Set up model
  AE = AutoEncoders[args.mode]
  if args.adv_train in [0, 1]:
    ae = AE(args.e1, args.d, args.e2)
  elif args.adv_train == 2:
    ae = AE(args.e1, args.d, args.e2, args.t)
  elif args.adv_train in [3, 4]:
    ae = AE(args)
  ae.cuda()
  
  # Set up exponential moving average
  if args.adv_train == 3:
    ema_dec = []
    for di in range(1, args.num_dec+1):
      ema_dec.append(EMA(args.ema_factor))
      dec = eval("ae.d%s"  % di)
      for name, param in dec.named_parameters():
        if param.requires_grad:
          ema_dec[-1].register(name, param.data)
    ema_se = EMA(args.ema_factor)
    for name, param in ae.se.named_parameters():
      if param.requires_grad:
        ema_se.register(name, param.data)
        
  elif args.adv_train == 4:
    ema_dec = []; ema_se = []
    for di in range(1, args.num_dec+1):
      ema_dec.append(EMA(args.ema_factor))
      dec = eval("ae.d%s"  % di)
      for name, param in dec.named_parameters():
Ejemplo n.º 8
0
# Set up directories and logs, etc.
TimeID, ExpID, rec_img_path, weights_path, log = set_up_dir(
    args.project_name, args.resume, args.CodeID)
logprint = LogPrint(log)
args.ExpID = ExpID

if __name__ == "__main__":
    # Set up model
    AE = AutoEncoders[args.mode]
    ae = AE(args).cuda()

    # Set up exponential moving average
    ema_dec = []
    ema_se = []
    for di in range(1, args.num_dec + 1):
        ema_dec.append(EMA(args.ema_factor))
        dec = eval("ae.d%s" % di)
        for name, param in dec.named_parameters():
            if param.requires_grad:
                ema_dec[-1].register(name, param.data)

    for sei in range(1, args.num_se + 1):
        ema_se.append(EMA(args.ema_factor))
        se = eval("ae.se%s" % sei)
        for name, param in se.named_parameters():
            if param.requires_grad:
                ema_se[-1].register(name, param.data)

    # Prepare data
    train_loader, num_train, test_loader, num_test = set_up_data(
        args.dataset, args.batch_size)