Beispiel #1
0
def evaluate():
    """Calculates loss and prediction accuracy given torch dataloader"""
    # Turn on evaluation mode which disables dropout.
    md.eval()
    avg_loss = RunningAverage()
    avg_acc = RunningAverage()

    with torch.no_grad():
        pbar = tqdm(test_dl, ascii=True, leave=False)
        for batch in pbar:
            # run model
            inp, target = batch
            inp, target = inp.to(device), target.to(device)
            out = md(inp.t())

            # calculate loss
            loss = criterion(out.view(-1), target.float())
            avg_loss.update(loss.item())

            # calculate accuracy
            pred = out.view(-1) > 0.5
            correct = pred == target.byte()
            avg_acc.update(torch.sum(correct).item() / len(correct))

            pbar.set_postfix(loss=f'{avg_loss():05.3f}',
                             acc=f'{avg_acc():05.2f}')

    return avg_loss(), avg_acc()
Beispiel #2
0
def test(model, dataloader, params):
    val_data = tqdm(dataloader.data_iterator(data_type='test',
                                             batch_size=params.batch_size),
                    total=(dataloader.size()[0] // params.batch_size))
    metrics = Metrics()
    loss_avg = RunningAverage()
    with torch.no_grad():
        for data, labels in val_data:
            model.eval()
            data = torch.tensor(data, dtype=torch.long).to(params.device)
            labels = torch.tensor(labels, dtype=torch.long).to(params.device)

            batch_masks = data != 0

            loss, logits = model(data,
                                 attention_mask=batch_masks,
                                 labels=labels)

            predicted = logits.max(2)[1]
            metrics.update(batch_pred=predicted.cpu().numpy(),
                           batch_true=labels.cpu().numpy(),
                           batch_mask=batch_masks.cpu().numpy())
            loss_avg.update(torch.mean(loss).item())
            val_data.set_postfix(type='VAL',
                                 loss='{:05.3f}'.format(loss_avg()))
    metrics.loss = loss_avg()
    return metrics
Beispiel #3
0
    def run_train(self, dataset, args):
        model, tokenizer = self.bert, self.tokenizer
        batch_size = args.batch_size
        model.train()
        train_examples = dataset.train_dataloader

        # Initialize Optimizer
        num_train_iters = args.epochs * len(
            train_examples) / batch_size / args.gradient_accumulation_steps
        self.init_optimizer(args, num_train_iters)

        train_avg_loss = RunningAverage()
        for epoch in range(args.epochs):
            print('Epoch {}'.format(epoch))
            train_bar = tqdm(
                enumerate(train_examples),
                total=len(train_examples),
                desc="Training",
            )
            for step, batch in train_bar:
                inputs = {k: v.to('cuda') for k, v in batch.items()}
                loss = model(inputs['input_ids'],
                             inputs['token_type_ids'],
                             labels=inputs['labels'])
                if args.n_gpus > 1:
                    loss = loss.mean()
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps
                loss.backward()
                train_avg_loss.update(loss.item())
Beispiel #4
0
    def one_epoch(self, mode, epoch_num):
        if mode not in ['train', 'test']:
            raise ValueError("Unknown value {} for mode".format(mode))
        print("{}ing... epoch: {}".format(mode, epoch_num))

        if mode == 'train':
            self.model.train()
            dl = self.train_data
            one_iter_function = self.one_train_iteration
        else:
            self.model.eval()
            dl = self.test_data
            one_iter_function = self.one_test_iteration

        acc_avg = RunningAverage()
        loss_avg = RunningAverage()
        with tqdm(total=len(dl)) as t:
            for n, (data, label) in enumerate(dl):
                if self.train_params['use_gpu']:
                    data, label = data.cuda(
                        self.train_params['gpu_id']), label.cuda(
                            self.train_params['gpu_id'])
                data, label = Variable(data), Variable(label)
                data = data.float()
                loss, acc = one_iter_function(data, label)
                loss_avg.update(loss)
                acc_avg.update(acc)
                t.set_postfix(
                    run_param="Epoch{} Loss:{:.2f} Acc:{:.2f}".format(
                        epoch_num, loss_avg(), acc_avg()))
                t.update()

        return acc_avg, loss_avg
Beispiel #5
0
def validate(model, val_set, params):
    val_data = tqdm(DataLoader(val_set,
                               batch_size=params.batch_size,
                               collate_fn=KeyphraseData.collate_fn),
                    total=(len(val_set) // params.batch_size))
    metrics = Metrics()
    loss_avg = RunningAverage()
    with torch.no_grad():
        model.eval()
        for data, labels, mask in val_data:

            data = data.to(params.device)
            labels = labels.to(params.device)
            mask = mask.to(params.device)

            loss, logits = model(data, attention_mask=mask, labels=labels)

            predicted = logits.max(2)[1]
            metrics.update(batch_pred=predicted.cpu().numpy(),
                           batch_true=labels.cpu().numpy(),
                           batch_mask=mask.cpu().numpy())
            loss_avg.update(torch.mean(loss).item())
            val_data.set_postfix(type='VAL',
                                 loss='{:05.3f}'.format(loss_avg()))

    metrics.loss = loss_avg()
    return metrics
Beispiel #6
0
def val(dataset, model, args, mode):
    model.eval()
    loader = DataLoader(dataset, batch_size=args.batch_size)
    dataloader_iter = iter(loader)
    state_h, state_c = model.init_state(args.sequence_length)
    loss_avg = RunningAverage()
    acc_avg = RunningAverage()
    while True:
        try:
            X, y = next(dataloader_iter)
        except RuntimeError:
            continue
        except StopIteration:
            break

        y_pred, (state_h,
                 state_c) = model(X.to(device),
                                  (state_h.to(device), state_c.to(device)))
        loss = criterion(y_pred.transpose(1, 2), y.long().to(device))
        loss_avg.update(loss.item())

        acc = accuracy(y_pred.transpose(1, 2), y.long().to(device))
        acc_avg.update(acc)

    print({
        'epoch': epoch,
        'val_loss': '{:05.4f}'.format(loss_avg()),
        'accuracy': '{:05.3f}'.format(acc_avg())
    })
Beispiel #7
0
def evaluate():
    """Calculates loss and prediction accuracy given torch dataloader"""
    # Turn on evaluation mode which disables dropout.
    md.eval()
    avg_loss = RunningAverage()
    avg_acc = RunningAverage()
    T_P = 0
    F_P = 0
    T_N = 0
    F_N = 0
    F__P = 0
    with torch.no_grad():
        pbar = tqdm(test_dl, ascii=True, leave=False)
        for batch in pbar:
            # run model
            inp, target = batch
            inp, target = inp.to(device), target.to(device)
            out = md(inp.t())

            # calculate loss
            loss = criterion(out.view(-1), target.float())
            avg_loss.update(loss.item())

            # calculate accuracy
            pred = out.view(-1) > 0.5
            correct = pred == target.byte()
            t_p, f_p, t_n, f_n, f__p = confusion(pred, target.byte())
            T_P += t_p
            F_P += f_p
            T_N += t_n
            F_N += f_n
            F__P += f__p
            avg_acc.update(torch.sum(correct).item() / len(correct))

            pbar.set_postfix(loss=f'{avg_loss():05.3f}',
                             acc=f'{avg_acc():05.2f}')


#     print('False_Positive',F_P)
#     print('True_Positive',T_P)
#     print('False_Neg',F_N)
#     print('True_Neg',T_N)
# print('Check',F__P)
    if (T_P == 0):
        avg_prec = 0.0
    else:
        avg_prec = T_P / (T_P + F_P)
    if (T_P == 0):
        avg_recall = 0.0
    else:
        avg_recall = T_P / (T_P + F_N)
    if (avg_prec + avg_recall == 0.0):
        f1_score = 0.0
    else:
        f1_score = 2 * (avg_prec * avg_recall) / (avg_prec + avg_recall)
    return avg_loss(), avg_acc(), avg_prec, avg_recall, f1_score
Beispiel #8
0
def train(model, dataloader, optimizer, scheduler, params):
    print("Starting training...")
    best_val_loss = 100
    #print(params.save_dir, params.tag)
    stats = Stats(params.save_dir, params.tag)
    for epoch in range(params.epoch_num):
        loss_avg = RunningAverage()
        train_data = tqdm(dataloader.data_iterator(data_type='train',
                                                   batch_size=params.batch_size),
                                                   total=(dataloader.size()[0] // params.batch_size))
        optimizer.zero_grad()
        model.zero_grad()
        for data, labels in train_data:
            model.train()
            data = torch.tensor(data, dtype=torch.long).to(params.device)
            labels = torch.tensor(labels, dtype=torch.long).to(params.device)

            batch_masks = (data != 0)
            output = model(data, attention_mask=batch_masks, labels=labels)

            loss = torch.mean(output[0])
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), params.max_grad_norm)  # Gradient clipping is not in AdamW anymore (so you can use amp without issue)

            optimizer.step()
            scheduler.step()
            model.zero_grad()
            optimizer.zero_grad()
            # update the average loss
            loss_avg.update(loss.item())
            train_data.set_postfix(type='TRAIN',epoch=epoch,loss='{:05.3f}'.format(loss_avg()))

        metrics = validate(model, dataloader, params)
        print('After {} epochs: F1={}, Loss={}'.format(epoch , metrics.f1(), metrics.loss))
        stats.update(metrics, epoch, loss_avg())
        stats.save()
        if epoch % params.save_freq == 0 and params.save_checkpoints:
            save_checkpoint({'epoch': epoch,
                                    'state_dict': model.state_dict(),
                                    'optim_dict': optimizer.state_dict()},
                                    is_best=False,
                                    tag=params.tag,
                                    epoch=epoch,
                                    score=metrics.f1(),
                                    checkpoint=params.save_dir)
        if metrics.loss < best_val_loss:
            best_val_loss = metrics.loss
            save_checkpoint({'epoch': epoch,
                                    'state_dict': model.state_dict(),
                                    'optim_dict': optimizer.state_dict()},
                                    is_best=True,
                                    tag=params.tag,
                                    epoch='generic',
                                    score='epic',
                                    checkpoint=params.save_dir)
Beispiel #9
0
    def train(self):
        set_logger(os.path.join(self.log_dir, 'train.log'), terminal=False)

        epochs = self.hps.num_epochs
        print_every = self.hps.print_every
        log_every = self.hps.log_summary_every
        lr = self.hps.learning_rate

        loss_avg = RunningAverage()
        summary_writer = SummaryWriter(log_dir=self.summ_dir)
        current_best_loss = 1e3

        encoder_optimizer = optim.Adam(self.encoder.parameters(), lr=lr)
        decoder_optimizer = optim.Adam(self.decoder.parameters(), lr=lr)

        training_pairs = self.dl

        criterion = nn.NLLLoss(reduce=False)

        if self.hps.resume:
            log('- load ckpts...')
            self.load_state_dict()

        for epoch in trange(epochs, desc='epochs'):
            loss_avg.reset()
            with tqdm(total=len(training_pairs)) as progress_bar:
                for language_pair, mask_pair in training_pairs:
                    language_pair, mask_pair = language_pair.to(
                        self.device), mask_pair.to(self.device)
                    loss = self.train_single(language_pair, mask_pair,
                                             encoder_optimizer,
                                             decoder_optimizer, criterion)
                    loss_avg.update(loss.item())
                    self.global_step += 1
                    if self.global_step % log_every == 0:
                        summary_writer.add_scalar('loss_value',
                                                  loss,
                                                  global_step=self.global_step)
                    if self.global_step % print_every == 0:
                        log('global step: {}, loss average: {:.3f}'.format(
                            self.global_step, loss_avg()))

                    progress_bar.set_postfix(loss_avg=loss_avg())
                    progress_bar.update()
            if loss_avg() < current_best_loss:
                log('new best loss average found, saving modules...')
                current_best_loss = loss_avg()
                state = {
                    'encoder': self.encoder.state_dict(),
                    'decoder': self.decoder.state_dict(),
                    'global_step': self.global_step,
                    'epoch': epoch,
                    'loss_avg': loss_avg()
                }
                torch.save(state, os.path.join(self.ckpt_dir, 'best.pth.tar'))
Beispiel #10
0
def train():
    # Turn on training mode which enables dropout.
    md.train()
    avg_loss = RunningAverage()
    avg_acc = RunningAverage()
    avg_prec = RunningAverage()
    avg_recall = RunningAverage()
    sparsity = 0.0
    info = {
        'loss': None,
        'acc': None,
    }

    pbar = tqdm(train_dl, ascii=True, leave=False)

    for batch in pbar:
        inp, target = batch
        inp, target = inp.to(device), target.to(device)
        # run model
        md.zero_grad()
        out = md(inp.t())
        loss = criterion(out.view(-1), target.float())
        loss.backward()

        torch.nn.utils.clip_grad_norm_(md.parameters(), args.clip)
        optimizer.step()
        if args.prune:
            pruner.step()

        # upgrade stats
        avg_loss.update(loss.item())
        pred = out.view(-1) > 0.5

        correct = pred == target.byte()
        avg_acc.update(torch.sum(correct).item() / len(correct))

        #         avg_prec.update(t_p/(t_p+f_p))
        #         avg_recall.update(t_p/(t_p+f_n))
        info['loss'] = f'{avg_loss():05.3f}'
        info['acc'] = f'{avg_acc():05.2f}'
        #         info['prec'] = f'{avg_prec():05.2f}'
        #         info['recall'] = f'{avg_recall():05.2f}'
        if args.prune:
            sparsity = pruner.log()
            info['spar'] = f'{sparsity:.2f}'

        pbar.set_postfix(**info)

    return avg_loss(), avg_acc(), sparsity
Beispiel #11
0
def train_one_epoch(model, datagen, loss_fn, optimizer):
    model.train()
    loss_avg = RunningAverage()
    with tqdm(total=len(datagen)) as t:
        for imgsA, imgsB, labels in datagen:
            imgsA, imgsB, labels = imgsA.to(DEVICE), imgsB.to(
                DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            out = model(imgsA, imgsB)
            loss = loss_fn(out, labels)
            loss.backward()
            optimizer.step()

            t.set_postfix(loss=loss.cpu().item())
            t.update()
            loss_avg.update(loss.cpu().item())
    return loss_avg()
Beispiel #12
0
    def run_train(self, dataset, ontology, args):
        model, tokenizer = self.bert, self.tokenizer
        batch_size = args.batch_size
        self.train()

        # Generate training examples
        turns = list(dataset['train'].iter_turns())
        train_examples = [
            turn_to_examples(t, ontology, tokenizer) for t in turns
        ]
        train_examples = list(itertools.chain.from_iterable(train_examples))
        print('Generated training examples')

        # Random Oversampling
        # Note that: Most of the constructed examples are negative
        if args.random_oversampling:
            negative_examples, positive_examples = [], []
            for example in train_examples:
                if example[-1] == 0: negative_examples.append(example)
                if example[-1] == 1: positive_examples.append(example)
            nb_negatives, nb_positives = len(negative_examples), len(
                positive_examples)
            sampled_positive_examples = random.choices(positive_examples,
                                                       k=int(nb_negatives / 8))
            train_examples = sampled_positive_examples + negative_examples
            print('Did Random Oversampling')
            print('Number of positive examples increased from {} to {}'.format(
                nb_positives, len(sampled_positive_examples)))

        # Initialize Optimizer
        num_train_iters = args.epochs * len(
            train_examples) / batch_size / args.gradient_accumulation_steps
        self.init_optimizer(args, num_train_iters)

        # Main training loop
        iterations = 0
        best_dev_joint_goal = 0.0
        train_avg_loss = RunningAverage()
        for epoch in range(args.epochs):
            print('Epoch {}'.format(epoch))

            random.shuffle(train_examples)
            pbar = tqdm(range(0, len(train_examples), batch_size))
            for i in pbar:
                iterations += 1

                # Next training batch
                batch = train_examples[i:i + batch_size]
                _, _, input_ids, token_type_ids, labels = list(zip(*batch))

                # Padding and Convert to Torch Tensors
                input_ids, input_masks = pad(input_ids, args.device)
                token_type_ids = pad(token_type_ids, args.device)[0]
                labels = torch.LongTensor(labels).to(args.device)

                # Calculate loss
                loss = model(input_ids,
                             token_type_ids,
                             input_masks,
                             labels=labels)
                if args.n_gpus > 1:
                    loss = loss.mean()
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps
                loss.backward()
                train_avg_loss.update(loss.item())

                # Update pbar
                pbar.update(1)
                pbar.set_postfix_str(f'Train Loss: {train_avg_loss()}')

                # parameters update
                if iterations % args.gradient_accumulation_steps == 0:
                    self.optimizer.step()
                    self.optimizer.zero_grad()

            # Evaluate on the dev set and the test set
            dev_results = self.run_dev(dataset, ontology, args)
            test_results = self.run_test(dataset, ontology, args)

            print('Evaluations after epoch {}'.format(epoch))
            print(dev_results)
            print(test_results)
            if dev_results['joint_goal'] > best_dev_joint_goal:
                best_dev_joint_goal = dev_results['joint_goal']
                self.save(args.output_dir)
                print('Saved the model')
Beispiel #13
0
def train(config_name, gene_variant=None):
    # Prepare tokenizer, dataset, and model
    configs = get_configs(config_name, verbose=False)
    if configs['use_gene_features']:
        assert(not gene_variant is None)
        configs['gene_variant'] = gene_variant
    tokenizer = BertTokenizer.from_pretrained(configs['transformer'], do_basic_tokenize=False)
    train_set, dev_set, test_set = load_oneie_dataset(configs['base_dataset_path'], tokenizer)
    model = BasicCorefModel(configs)

    # Initialize the optimizer
    num_train_docs = len(train_set)
    epoch_steps = int(math.ceil(num_train_docs / configs['batch_size']))
    num_train_steps = int(epoch_steps * configs['epochs'])
    num_warmup_steps = int(num_train_steps * 0.1)
    optimizer = model.get_optimizer(num_warmup_steps, num_train_steps)
    print('Initialized optimizer')

    # Main training loop
    best_dev_score, iters, batch_loss = 0.0, 0, 0
    for epoch in range(configs['epochs']):
        #print('Epoch: {}'.format(epoch))
        print('\n')
        progress = tqdm.tqdm(total=epoch_steps, ncols=80,
                             desc='Train {}'.format(epoch))
        accumulated_loss = RunningAverage()

        train_indices = list(range(num_train_docs))
        random.shuffle(train_indices)
        for train_idx in train_indices:
            iters += 1
            inst = train_set[train_idx]
            iter_loss = model(inst, is_training=True)[0]
            iter_loss /= configs['batch_size']
            iter_loss.backward()
            batch_loss += iter_loss.data.item()
            if iters % configs['batch_size'] == 0:
                accumulated_loss.update(batch_loss)
                torch.nn.utils.clip_grad_norm_(model.parameters(), configs['max_grad_norm'])
                optimizer.step()
                optimizer.zero_grad()
                batch_loss = 0
                # Update progress bar
                progress.update(1)
                progress.set_postfix_str('Average Train Loss: {}'.format(accumulated_loss()))
        progress.close()

        # Evaluation after each epoch
        print('Evaluation on the dev set', flush=True)
        dev_score = evaluate(model, dev_set, configs)['avg']

        # Save model if it has better dev score
        if dev_score > best_dev_score:
            best_dev_score = dev_score
            # Evaluation on the test set
            print('Evaluation on the test set', flush=True)
            evaluate(model, test_set, configs)
            # Save the model
            save_path = os.path.join(configs['saved_path'], 'model.pt')
            torch.save({'model_state_dict': model.state_dict()}, save_path)
            print('Saved the model', flush=True)
Beispiel #14
0
for i in range(EPOCHS):
    log.info('epoch {}'.format(i))
    model.train()
    loss_trn_avg = RunningAverage()
    with tqdm(total=len(trn_loader)) as t:
        for imgs, labels in trn_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            out = model(imgs)
            loss = loss_fn(out, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            t.set_postfix(loss=loss.cpu().item())
            t.update()
            loss_trn_avg.update(loss.cpu().item())

    model.eval()
    loss_val_avg = RunningAverage()
    iou_val_avg = RunningAverage()
    with torch.no_grad():
        for imgs, labels in val_loader:
            imgs, labels = imgs.to(device), labels.to(device)
            out = model(imgs)
            loss = loss_fn(out, labels)
            loss_val_avg.update(loss.cpu().item())
            iou_val_avg.update(
                iou_sim(labels.cpu().detach().numpy(),
                        out.cpu().detach().numpy()))

    log.info('trn:{:05.3f}, val:{:05.3f}, iou_val: {:.3f}'.format(