Exemple #1
0
def evaluate(args, model, eval_dataloader, metrics):
    # Eval!
    logger.info("  Num examples = %d", len(eval_dataloader))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = AverageMeter()
    metrics.reset()
    preds = []
    targets = []
    pbar = ProgressBar(n_total=len(eval_dataloader), desc='Evaluating')
    for bid, batch in enumerate(eval_dataloader):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)
        with torch.no_grad():
            inputs = {'input_ids': batch[0],
                      'attention_mask': batch[1],
                      'labels': batch[3]}
            inputs['token_type_ids'] = batch[2]
            outputs = model(**inputs)
            loss, logits = outputs[:2]
            eval_loss.update(loss.item(), n=1)
        preds.append(logits.cpu().detach())
        targets.append(inputs['labels'].cpu().detach())
        pbar(bid)
    preds = torch.cat(preds, dim=0).cpu().detach()
    targets = torch.cat(targets, dim=0).cpu().detach()
    metrics(preds, targets)
    eval_log = {"eval_acc": metrics.value(),
                'eval_loss': eval_loss.avg}
    return eval_log
Exemple #2
0
 def init_losses(self):
     # 5个损失:总损失,位置损失,置信度损失,类别损失,各检测头的损失
     # 总损失是位置损失,置信度损失,类别损失3个之和,各损失均是一段时间的平均损失
     # 各检测头的损失按顺序分别是32,16,8处的
     self.losses = {
         'loss': AverageMeter(),
         'giou_loss': AverageMeter(),
         'conf_loss': AverageMeter(),
         'class_loss': AverageMeter(),
         'loss_per_branch': [AverageMeter() for _ in range(3)],
     }
def eval(val_loader, model, criterion, device, out_file):
    '''
    Run evaluation
    '''
    losses = AverageMeter()
    accs = AverageMeter()

    # switch to eval mode
    model.eval()

    with torch.no_grad():
        for i, (x, target) in enumerate(val_loader):

            x = x.to(device)
            target = target.to(device)

            # Forward pass
            logits = model(x)
            loss = criterion(logits, target)

            # measure accuracy and record loss
            acc = accuracy_topk(logits.data, target)
            accs.update(acc.item(), x.size(0))
            losses.update(loss.item(), x.size(0))

    text = '\n Test\t Loss ({loss.avg:.4f})\t Accuracy ({prec.avg:.3f})\n'.format(
        loss=losses, prec=accs)
    print(text)
    with open(out_file, 'a') as f:
        f.write(text)
def validate(val_loader, backbone, model, acc_prefixes, args):
    batch_time = AverageMeter('Time', ':.3f')

    # switch to evaluate mode
    model.eval()

    # TODO: Aniruddha
    pred_var_stack, labels_var_stack = [torch.Tensor()]*5, torch.Tensor()
    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            images = images.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

            # compute output
            features = backbone(images)
            outputs = model(features)

            if not i:
                acc_meters = [
                    NoBatchAverageMeter('', ':11.2f')
                    for i in range(len(outputs))
                ]
                progress = NoTabProgressMeter(
                    len(val_loader),
                    [batch_time, *acc_meters],
                    prefix='Test: ')

            # measure accuracy
            for output, acc_meter in zip(outputs, acc_meters):
                acc1, _ = accuracy(output, target, topk=(1, 5))
                acc_meter.update(acc1[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0 or i == len(val_loader)-1:
                line = progress.display(i)
                len_prefixes = len(acc_prefixes) * len(acc_prefixes[0])
                prefix_line = ' ' * (len(line) - len_prefixes)
                prefix_line += ''.join(acc_prefixes)
                logger.info(prefix_line)
                logger.info(line)
            
            for layer_id in range(5):
                pred_var_stack[layer_id] = torch.cat((pred_var_stack[layer_id], outputs[layer_id].cpu()), dim=0)
            labels_var_stack = torch.cat((labels_var_stack, target.cpu()), dim=0)

    return acc_meters, pred_var_stack, labels_var_stack
def train(train_loader):
    pbar = ProgressBar(n_batch=len(train_loader))
    train_loss = AverageMeter()
    train_acc = AverageMeter()
    count = 0
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output, loss = model(data, y=target, loss_fn=nn.CrossEntropyLoss())
        pred = output.argmax(
            dim=1, keepdim=True)  # get the index of the max log-probability
        correct = pred.eq(target.view_as(pred)).sum().item()
        loss.backward()
        optimizer.step()
        count += data.size(0)
        train_acc.update(correct, n=1)
        pbar.batch_step(batch_idx=batch_idx,
                        info={
                            'loss': loss.item(),
                            'acc': correct / data.size(0)
                        },
                        bar_type='Training')
        train_loss.update(loss.item(), n=1)
    print(' ')
    return {'loss': train_loss.avg, 'acc': train_acc.sum / count}
Exemple #6
0
def validation(opt, val_loader, model, epoch):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    model.val_start()

    end = time.time()
    loss = 0
    for i, val_data in enumerate(val_loader):
        data_time.update(time.time() - end)

        model.logger = train_logger

        loss = (model.val_forward(*val_data) + loss * i) / (i + 1)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Print log info
    logging.info('Epoch: [{0}]\t'
                 '{e_log}\t'
                 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                     epoch,
                     batch_time=batch_time,
                     data_time=data_time,
                     e_log=str(model.logger)))
    return loss
Exemple #7
0
def train(train_loader):
    pbar = ProgressBar(n_total=len(train_loader), desc='Training')
    train_loss = AverageMeter()
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()
        pbar(step=batch_idx, info={'loss': loss.item()})
        train_loss.update(loss.item(), n=1)
    return {'loss': train_loss.avg}
Exemple #8
0
def train(train_loader, lr_scheduler=None):
    pbar = ProgressBar(n_batch=len(train_loader))
    train_loss = AverageMeter()
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = loss_fn(output, target)
        loss.backward()
        optimizer.step()
        if lr_scheduler is not None:
            lr_scheduler.step()
        pbar.batch_step(batch_idx=batch_idx,
                        info={'loss': loss.item()},
                        bar_type='Training')
        train_loss.update(loss.item(), n=1)
    return {'loss': train_loss.avg}
Exemple #9
0
def train(dataloader):
    pbar = ProgressBar(n_total=len(dataloader), desc='Training')
    train_loss = AverageMeter()
    model.train()
    for batch_idx, batch in enumerate(dataloader):
        b_features, b_target, b_idx = batch['features'].to(
            DEVICE), batch['target'].to(DEVICE), batch['idx'].to(DEVICE)
        optimizer.zero_grad()
        with autocast():
            logits, probs = model(b_features)
            loss = F.cross_entropy(logits, b_target)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        pbar(step=batch_idx, info={'loss': loss.item()})
        train_loss.update(loss.item(), n=1)
    return {'loss': train_loss.avg}
Exemple #10
0
    def eval(self, loader):
        self.model.eval()
        losses = AverageMeter()
        correct = 0
        with torch.no_grad():
            pbar = ImProgressBar(len(loader))
            for i, (imgs, targets) in enumerate(loader):
                imgs, targets = imgs.cuda(), targets.cuda()
                outputs = self.model(imgs)

                _, predicted = torch.max(outputs.data, dim=1)
                correct += (predicted == targets).sum().item()

                loss = self.criterion(outputs, targets)
                losses.update(loss.item(), 1)

                pbar.update(i)
            pbar.finish()
        return losses.avg, correct / len(loader.dataset)
def train(dataloader):
    pbar = ProgressBar(n_total=len(dataloader), desc='Training')
    train_loss = AverageMeter()
    for batch_idx, batch in enumerate(dataloader):
        # forward
        probas = model.forward(batch['features'])
        # backward
        grad_w, grad_b = model.backward(batch['features'], batch['target'],
                                        probas)
        # manual regularization -- account for mini-batches
        l2_reg = model.LAMBDA * model.weights / len(dataloader)
        # update weights
        model.weights -= learning_rate * (grad_w + l2_reg)
        model.bias -= learning_rate * grad_b
        # record loss
        loss = model._logit_cost(batch['target'], probas)
        # update meter
        train_loss.update(loss.item(), n=1)
        # update progress bar
        pbar(step=batch_idx, info={'batch_loss': loss.item()})
    return {'train_loss': train_loss.avg}
Exemple #12
0
    def train(self):
        self.model.train()
        losses = AverageMeter()
        correct = 0
        pbar = ImProgressBar(len(self.train_loader))
        for i, (imgs, targets) in enumerate(self.train_loader):
            imgs, targets = imgs.cuda(), targets.cuda()
            outputs = self.model(imgs)

            _, predicted = torch.max(outputs.data, dim=1)
            correct += (predicted == targets).sum().item()

            loss = self.criterion(outputs, targets)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            losses.update(loss.item(), 1)
            pbar.update(i)
        pbar.finish()
        return losses.avg, correct / len(self.train_loader.dataset)
def train(train_loader, backbone, model, optimizer, acc_prefixes, epoch, args):
    batch_time = AverageMeter('B', ':.2f')
    data_time = AverageMeter('D', ':.2f')

    # switch to train mode
    model.train()

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        images = images.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)

        with torch.no_grad():
            features = backbone(images)
        outputs = model(features)

        if not i:
            acc_meters = [
                NoBatchAverageMeter('', ':>11.2f')
                for i in range(len(outputs))
            ]
            progress = NoTabProgressMeter(
                len(train_loader),
                [batch_time, data_time, *acc_meters],
                prefix="Epoch: [{}]".format(epoch))

        # measure accuracy
        optimizer.zero_grad()
        for output, acc_meter in zip(outputs, acc_meters):
            loss = F.cross_entropy(output, target)
            loss.backward()
            acc1, _ = accuracy(output, target, topk=(1, 5))
            acc_meter.update(acc1[0], images.size(0))
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            line = progress.display(i)
            len_prefixes = len(acc_prefixes) * len(acc_prefixes[0])
            prefix_line = ' ' * (len(line) - len_prefixes)
            prefix_line += ''.join(acc_prefixes)
            logger.info(prefix_line)
            logger.info(line)
def train(dataloader):
    pbar = ProgressBar(n_total=len(dataloader), desc='Training')
    train_loss = AverageMeter()
    for batch_idx, batch in enumerate(dataloader):
        # forward
        y_hat = model.forward(batch['features'].float())
        # backward
        grad_w, grad_b = model.backward(batch['features'], batch['target'],
                                        y_hat)
        # manual regularization\
        l2_reg = model.LAMBDA * model.weights
        l2_reg = l2_reg.reshape(2, 1)
        # update weights
        model.weights -= learning_rate * (grad_w + l2_reg).view(-1)
        model.bias -= (learning_rate * grad_b).view(-1)
        # record loss
        loss = model.loss(batch['target'], y_hat)
        # update meter
        train_loss.update(loss.item(), n=1)
        # update progress bar
        pbar(step=batch_idx, info={'batch_loss': loss.item()})
    return {'train_loss': train_loss.avg}
Exemple #15
0
def train(train_loader,
          model,
          criterion,
          optimizer,
          epoch,
          device,
          print_freq=25):
    '''
    Run one train epoch
    '''
    losses = AverageMeter()

    # switch to train mode
    model.train()

    for i, (id, mask, target) in enumerate(train_loader):

        id = id.to(device)
        mask = mask.to(device)
        target = target.to(device)

        # Forward pass
        logits = model(id, mask)
        loss = criterion(logits, target)

        # Backward pass and update
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure accuracy and record loss
        losses.update(loss.item(), id.size(0))

        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})'.format(
                      epoch, i, len(train_loader), loss=losses))
def train(dataloader):
    pbar = ProgressBar(n_total=len(dataloader), desc='Training')
    train_loss = AverageMeter()
    model.train()
    for batch_idx, batch in enumerate(dataloader):
        b_features, b_target, b_idx = batch['features'].to(
            DEVICE), batch['target'].to(DEVICE), batch['idx'].to(DEVICE)
        optimizer.zero_grad()
        with autocast():
            logits, probs = model(b_features)
            loss = F.cross_entropy(logits, b_target)
            # regularize loss -- but not the intercept
            LAMBDA, L2 = 2, 0.
            for name, p in model.named_parameters():
                if 'weight' in name:
                    L2 = L2 + (p**2).sum()
            loss = loss + 2. / b_target.size(0) * LAMBDA * L2
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        scheduler.step()
        pbar(step=batch_idx, info={'loss': loss.item()})
        train_loss.update(loss.item(), n=1)
    return {'loss': train_loss.avg}
Exemple #17
0
def eval(val_loader, model, criterion, device):
    '''
    Run evaluation
    '''
    losses = AverageMeter()

    # switch to eval mode
    model.eval()

    with torch.no_grad():
        for i, (id, mask, target) in enumerate(val_loader):

            id = id.to(device)
            mask = mask.to(device)
            target = target.to(device)

            # Forward pass
            logits = model(id, mask)
            loss = criterion(logits, target)

            # measure accuracy and record loss
            losses.update(loss.item(), id.size(0))

    print('Test\t Loss ({loss.avg:.4f})\n'.format(loss=losses))
Exemple #18
0
 def train_epoch(dataset, keep_prob=0.5, batch_size=2048):
     _accs = AverageMeter()
     _losses = AverageMeter()
     sampleNum = dataset.sample_number
     for batch in tqdm(range(sampleNum // batch_size)):
         X_batch, y_batch = dataset.next_batch(batch_size=batch_size)
         feed_dict = {
             X_input: X_batch,
             y_input: y_batch,
             keepProb: keep_prob,
             batch_size: batch_size
         }
         fetches = [acc, loss, train_op]
         _acc, _loss, _ = sess.run(fetches, feed_dict)
         _accs.update(_acc)
         _losses.update(_loss)
     return _accs, _losses
Exemple #19
0
def train(opt, train_loader, model, epoch):
    # average meters to record the training statistics
    batch_time = AverageMeter()
    data_time = AverageMeter()
    train_logger = LogCollector()

    # switch to train mode
    model.train_start()

    end = time.time()
    for i, train_data in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        # make sure train logger is used
        model.logger = train_logger

        # Update the model
        model.train_forward(*train_data)
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Print log info
        if model.Eiters % opt.log_step == 0:
            logging.info(
                'Epoch: [{0}][{1}/{2}]\t'
                '{e_log}\t'
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                    epoch,
                    i,
                    len(train_loader),
                    batch_time=batch_time,
                    data_time=data_time,
                    e_log=str(model.logger)))

        # Record logs in tensorboard
        tb_logger.log_value('epoch', epoch, step=model.Eiters)
        tb_logger.log_value('step', i, step=model.Eiters)
        tb_logger.log_value('batch_time', batch_time.val, step=model.Eiters)
        tb_logger.log_value('data_time', data_time.val, step=model.Eiters)
        model.logger.tb_log(tb_logger, step=model.Eiters)
def test(test_loader):
    pbar = ProgressBar(n_batch=len(test_loader))
    valid_loss = AverageMeter()
    valid_acc = AverageMeter()
    model.eval()
    count = 0
    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)
            output, loss = model(data, y=target, loss_fn=nn.CrossEntropyLoss())
            pred = output.argmax(
                dim=1,
                keepdim=True)  # get the index of the max log-probability
            correct = pred.eq(target.view_as(pred)).sum().item()
            valid_loss.update(loss, n=data.size(0))
            valid_acc.update(correct, n=1)
            count += data.size(0)
            pbar.batch_step(batch_idx=batch_idx, info={}, bar_type='Testing')
    return {'valid_loss': valid_loss.avg, 'valid_acc': valid_acc.sum / count}
Exemple #21
0
def test(test_loader):
    pbar = ProgressBar(n_total=len(test_loader),desc='Testing')
    valid_loss = AverageMeter()
    valid_acc = AverageMeter()
    model.eval()
    count = 0
    with torch.no_grad():
        for batch_idx,(data, target) in enumerate(test_loader):
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = loss_fn(output, target).item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct = pred.eq(target.view_as(pred)).sum().item()
            valid_loss.update(loss,n = data.size(0))
            valid_acc.update(correct, n=1)
            count += data.size(0)
            pbar(step=batch_idx)
    return {'valid_loss':valid_loss.avg,
            'valid_acc':valid_acc.sum /count}
def train(train_loader,
          model,
          criterion,
          optimizer,
          epoch,
          device,
          out_file,
          print_freq=1):
    '''
    Run one train epoch
    '''
    losses = AverageMeter()
    accs = AverageMeter()

    # switch to train mode
    model.train()

    for i, (x, target) in enumerate(train_loader):

        x = x.to(device)
        target = target.to(device)

        # Forward pass
        logits = model(x)
        loss = criterion(logits, target)

        # Backward pass and update
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure accuracy and record loss
        acc = accuracy_topk(logits.data, target)
        accs.update(acc.item(), x.size(0))
        losses.update(loss.item(), x.size(0))

        if i % print_freq == 0:
            text = '\n Epoch: [{0}][{1}/{2}]\t Loss {loss.val:.4f} ({loss.avg:.4f})\t Accuracy {prec.val:.3f} ({prec.avg:.3f})'.format(
                epoch, i, len(train_loader), loss=losses, prec=accs)
            print(text)
            with open(out_file, 'a') as f:
                f.write(text)
def test(dataloader):
    pbar = ProgressBar(n_total=len(dataloader), desc='Testing')
    valid_loss = AverageMeter()
    valid_acc = AverageMeter()
    count = 0
    for batch_idx, batch in enumerate(dataloader):
        # forward -- skip backward prop
        probas = model.forward(batch['features'])
        # record loss
        loss = model._logit_cost(batch['target'], probas)
        # get predictions
        prediction = torch.where(probas > 0.5, torch.tensor(1, device=device),
                                 torch.tensor(0, device=device)).view(-1)
        # compare
        correct = prediction.eq(batch['target']).sum().item()
        valid_loss.update(loss.item(), n=batch['features'].size(0))
        valid_acc.update(correct, n=1)
        count += batch['features'].size(0)
        pbar(step=batch_idx)
    return {'valid_loss': valid_loss.avg, 'valid_acc': valid_acc.sum / count}
Exemple #24
0
def main(args):
    train_info = []
    best_epoch = np.zeros(5)
    for val_folder_index in range(5):
        best_balance_acc = 0
        whole_train_list = ['D8E6', '117E', '676F', 'E2D7', 'BE52']
        val_WSI_list = whole_train_list[val_folder_index]
        train_WSI_list = whole_train_list
        train_WSI_list.pop(val_folder_index)
        train_directory = '../data/finetune/1percent/'
        valid_directory = '../data/finetune/1percent'
        dataset = {}
        dataset_train0 = datasets.ImageFolder(
            root=train_directory + train_WSI_list[0],
            transform=get_aug(train=False,
                              train_classifier=True,
                              **args.aug_kwargs))
        dataset_train1 = datasets.ImageFolder(
            root=train_directory + train_WSI_list[1],
            transform=get_aug(train=False,
                              train_classifier=True,
                              **args.aug_kwargs))
        dataset_train2 = datasets.ImageFolder(
            root=train_directory + train_WSI_list[2],
            transform=get_aug(train=False,
                              train_classifier=True,
                              **args.aug_kwargs))
        dataset_train3 = datasets.ImageFolder(
            root=train_directory + train_WSI_list[3],
            transform=get_aug(train=False,
                              train_classifier=True,
                              **args.aug_kwargs))
        dataset['valid'] = datasets.ImageFolder(
            root=valid_directory + val_WSI_list,
            transform=get_aug(train=False,
                              train_classifier=False,
                              **args.aug_kwargs))
        dataset['train'] = data.ConcatDataset(
            [dataset_train0, dataset_train1, dataset_train2, dataset_train3])

        train_loader = torch.utils.data.DataLoader(
            dataset=dataset['train'],
            batch_size=args.eval.batch_size,
            shuffle=True,
            **args.dataloader_kwargs)
        test_loader = torch.utils.data.DataLoader(
            dataset=dataset['valid'],
            batch_size=args.eval.batch_size,
            shuffle=False,
            **args.dataloader_kwargs)

        model = get_backbone(args.model.backbone)
        classifier = nn.Linear(in_features=model.output_dim,
                               out_features=9,
                               bias=True).to(args.device)

        assert args.eval_from is not None
        save_dict = torch.load(args.eval_from, map_location='cpu')
        msg = model.load_state_dict(
            {
                k[9:]: v
                for k, v in save_dict['state_dict'].items()
                if k.startswith('backbone.')
            },
            strict=True)

        # print(msg)
        model = model.to(args.device)
        model = torch.nn.DataParallel(model)

        classifier = torch.nn.DataParallel(classifier)
        # define optimizer
        optimizer = get_optimizer(
            args.eval.optimizer.name,
            classifier,
            lr=args.eval.base_lr * args.eval.batch_size / 256,
            momentum=args.eval.optimizer.momentum,
            weight_decay=args.eval.optimizer.weight_decay)

        # define lr scheduler
        lr_scheduler = LR_Scheduler(
            optimizer,
            args.eval.warmup_epochs,
            args.eval.warmup_lr * args.eval.batch_size / 256,
            args.eval.num_epochs,
            args.eval.base_lr * args.eval.batch_size / 256,
            args.eval.final_lr * args.eval.batch_size / 256,
            len(train_loader),
        )

        loss_meter = AverageMeter(name='Loss')
        acc_meter = AverageMeter(name='Accuracy')

        # Start training
        global_progress = tqdm(range(0, args.eval.num_epochs),
                               desc=f'Evaluating')
        for epoch in global_progress:
            loss_meter.reset()
            model.eval()
            classifier.train()
            local_progress = tqdm(train_loader,
                                  desc=f'Epoch {epoch}/{args.eval.num_epochs}',
                                  disable=True)

            for idx, (images, labels) in enumerate(local_progress):
                classifier.zero_grad()
                with torch.no_grad():
                    feature = model(images.to(args.device))

                preds = classifier(feature)

                loss = F.cross_entropy(preds, labels.to(args.device))

                loss.backward()
                optimizer.step()
                loss_meter.update(loss.item())
                lr = lr_scheduler.step()
                local_progress.set_postfix({
                    'lr': lr,
                    "loss": loss_meter.val,
                    'loss_avg': loss_meter.avg
                })

            writer.add_scalar('Valid/Loss', loss_meter.avg, epoch)
            writer.add_scalar('Valid/Lr', lr, epoch)
            writer.flush()

            PATH = 'checkpoint/exp_0228_triple_1percent/' + val_WSI_list + '/' + val_WSI_list + '_tunelinear_' + str(
                epoch) + '.pth'

            torch.save(classifier, PATH)

            classifier.eval()
            correct, total = 0, 0
            acc_meter.reset()

            pred_label_for_f1 = np.array([])
            true_label_for_f1 = np.array([])
            for idx, (images, labels) in enumerate(test_loader):
                with torch.no_grad():
                    feature = model(images.to(args.device))
                    preds = classifier(feature).argmax(dim=1)
                    correct = (preds == labels.to(args.device)).sum().item()

                    preds_arr = preds.cpu().detach().numpy()
                    labels_arr = labels.cpu().detach().numpy()
                    pred_label_for_f1 = np.concatenate(
                        [pred_label_for_f1, preds_arr])
                    true_label_for_f1 = np.concatenate(
                        [true_label_for_f1, labels_arr])
                    acc_meter.update(correct / preds.shape[0])

            f1 = f1_score(true_label_for_f1,
                          pred_label_for_f1,
                          average='macro')
            balance_acc = balanced_accuracy_score(true_label_for_f1,
                                                  pred_label_for_f1)
            print('Epoch:  ', str(epoch),
                  f'Accuracy = {acc_meter.avg * 100:.2f}')
            print('F1 score =  ', f1, 'balance acc:  ', balance_acc)
            if balance_acc > best_balance_acc:
                best_epoch[val_folder_index] = epoch
                best_balance_acc = balance_acc
            train_info.append([val_WSI_list, epoch, f1, balance_acc])

    with open('checkpoint/exp_0228_triple_1percent/train_info.csv', 'w') as f:
        # using csv.writer method from CSV package
        write = csv.writer(f)
        write.writerows(train_info)
    print(best_epoch)
Exemple #25
0
    def train_step(self, epoch, trainloader):
        """Training step for each epoch.
        Args:
         epoch: current epoch
         trainloader: dataloader for train set
        Return:
         None
        """
        self.model.train()
        epoch_loss = AverageMeter()

        batch_time = AverageMeter()
        data_time = AverageMeter()

        metrics_meter = dict()
        for k in self.measures.train.keys():
            metrics_meter[k] = AverageMeter()

        bar = Bar('Processing', max=len(trainloader))
        end = time.time()

        for batch_idx, (data, targets) in enumerate(trainloader):

            data_time.update(time.time() - end)
            if self.config.cuda:
                data, targets = data.cuda(), targets.cuda()

            preds = self.model(data)

            loss = self.model.loss_function(preds, targets)

            # backward
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            # calculate measure statistics
            batch_measure = dict()
            for k, func in self.metrics_func.items():
                if k.startswith('top'):
                    batch_measure[k] = func(preds, targets)[0]
                else:
                    batch_measure[k] = func(preds, targets)
                if isinstance(batch_measure[k], t.autograd.Variable):
                    batch_measure[k] = batch_measure[k].item()
                metrics_meter[k].update(batch_measure[k], data.size(0))

            # record statistics
            self.stats.batch_loss.append(loss.item())
            epoch_loss.update(loss.item())

            batch_time.update(time.time() - end)
            end = time.time()

            #plot progress
            measure_bar = ' | '.join(
                ['%s : %.4f' % (k, v.avg) for k, v in metrics_meter.items()])
            bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | '.format(
                batch=batch_idx + 1,
                size=len(trainloader),
                data=data_time.avg,
                bt=batch_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td,
                loss=epoch_loss.avg) + measure_bar
            bar.next()

        bar.finish()
        for k in metrics_meter.keys():
            self.measures.val[k] = metrics_meter[k].avg
        # plot on tensorboard
        '''
        for k, v in metrics_meter.items():
            self.metrics[k].append(v.avg)
            log_value('train %s' % k, v.avg, epoch)
        '''
        self.stats.train_epoch_loss.append(epoch_loss.avg)
        # log_value('epoch_loss', epoch_loss.avg, epoch)

        logger.info(
            ('%02i - ' % (epoch + 1)) +
            ' / '.join(['train loss %.5f' % epoch_loss.avg] +
                       [k + ' %.5f' % v.avg
                        for k, v in metrics_meter.items()]))
Exemple #26
0
    def validate(self, epoch, valdataloader):
        self.model.eval()

        epoch_loss = AverageMeter()
        batch_time = AverageMeter()
        data_time = AverageMeter()

        metrics_meter = dict()
        for k in self.measures.val.keys():
            metrics_meter[k] = AverageMeter()

        end = time.time()
        bar = Bar('Processing', max=len(valdataloader))
        with t.no_grad():
            for batch_idx, (data, targets) in enumerate(valdataloader):
                data_time.update(time.time() - end)
                if self.config.cuda:
                    data, targets = data.cuda(), targets.cuda()

                preds = self.model(data)
                loss = self.model.loss_function(preds, targets)

                # calculate measure statistics
                batch_measure = dict()
                for k, func in self.metrics_func.items():
                    if k.startswith('top'):
                        batch_measure[k] = func(preds, targets)[0]
                    else:
                        batch_measure[k] = func(preds, targets)
                    if isinstance(batch_measure[k], t.autograd.Variable):
                        batch_measure[k] = batch_measure[k].item()
                    metrics_meter[k].update(batch_measure[k], data.size(0))
                self.stats.batch_loss.append(loss.item())
                epoch_loss.update(loss.item())

                batch_time.update(time.time() - end)
                end = time.time()

                #plot progress
                measure_bar = ' | '.join([
                    '%s : %.4f' % (k, v.avg) for k, v in metrics_meter.items()
                ])
                bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | '.format(
                    batch=batch_idx + 1,
                    size=len(valdataloader),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    loss=epoch_loss.avg) + measure_bar
                bar.next()

        bar.finish()
        for k in metrics_meter.keys():
            self.measures.val[k] = metrics_meter[k]

        self.stats.eval_epoch_loss.append(epoch_loss.avg)
        #log_value('val loss', epoch_loss.avg, epoch)
        #for k, v in metrics_meter.items():
        #    self.metrics[k].append(v.avg)
        #    log_value('val %s' % k, v.avg, epoch)

        to_log = dict([('epoch', epoch)] + [(k, v.avg)
                                            for k, v in metrics_meter.items()])

        logger.debug("__log__:%s" % json.dumps(to_log))

        return to_log
Exemple #27
0
def main(args):

    train_set = get_dataset(
        args.dataset, 
        args.data_dir, 
        transform=get_aug(args.model, args.image_size, train=False, train_classifier=True), 
        train=True, 
        download=args.download, # default is False
        debug_subset_size=args.batch_size if args.debug else None
    )
    test_set = get_dataset(
        args.dataset, 
        args.data_dir, 
        transform=get_aug(args.model, args.image_size, train=False, train_classifier=False), 
        train=False, 
        download=args.download, # default is False
        debug_subset_size=args.batch_size if args.debug else None
    )


    train_loader = torch.utils.data.DataLoader(
        dataset=train_set,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=True
    )
    test_loader = torch.utils.data.DataLoader(
        dataset=test_set,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=True
    )
    model = get_backbone(args.backbone)
    classifier = nn.Linear(in_features=model.output_dim, out_features=10, bias=True).to(args.device)

    assert args.eval_from is not None
    save_dict = torch.load(args.eval_from, map_location='cpu')
    msg = model.load_state_dict({k[9:]:v for k, v in save_dict['state_dict'].items() if k.startswith('backbone.')}, strict=True)
    
    # print(msg)
    model = model.to(args.device)
    model = torch.nn.DataParallel(model)

    # if torch.cuda.device_count() > 1: classifier = torch.nn.SyncBatchNorm.convert_sync_batchnorm(classifier)
    classifier = torch.nn.DataParallel(classifier)
    # define optimizer
    optimizer = get_optimizer(
        args.optimizer, classifier, 
        lr=args.base_lr*args.batch_size/256, 
        momentum=args.momentum, 
        weight_decay=args.weight_decay)

    # define lr scheduler
    lr_scheduler = LR_Scheduler(
        optimizer,
        args.warmup_epochs, args.warmup_lr*args.batch_size/256, 
        args.num_epochs, args.base_lr*args.batch_size/256, args.final_lr*args.batch_size/256, 
        len(train_loader),
    )

    loss_meter = AverageMeter(name='Loss')
    acc_meter = AverageMeter(name='Accuracy')

    # Start training
    global_progress = tqdm(range(0, args.num_epochs), desc=f'Evaluating')
    for epoch in global_progress:
        loss_meter.reset()
        model.eval()
        classifier.train()
        local_progress = tqdm(train_loader, desc=f'Epoch {epoch}/{args.num_epochs}', disable=args.hide_progress)
        
        for idx, (images, labels) in enumerate(local_progress):

            classifier.zero_grad()
            with torch.no_grad():
                feature = model(images.to(args.device))

            preds = classifier(feature)

            loss = F.cross_entropy(preds, labels.to(args.device))

            loss.backward()
            optimizer.step()
            loss_meter.update(loss.item())
            lr = lr_scheduler.step()
            local_progress.set_postfix({'lr':lr, "loss":loss_meter.val, 'loss_avg':loss_meter.avg})
        

        if args.head_tail_accuracy and epoch != 0 and (epoch+1) != args.num_epochs: continue

        local_progress=tqdm(test_loader, desc=f'Test {epoch}/{args.num_epochs}', disable=args.hide_progress)
        classifier.eval()
        correct, total = 0, 0
        acc_meter.reset()
        for idx, (images, labels) in enumerate(local_progress):
            with torch.no_grad():
                feature = model(images.to(args.device))
                preds = classifier(feature).argmax(dim=1)
                correct = (preds == labels.to(args.device)).sum().item()
                acc_meter.update(correct/preds.shape[0])
                local_progress.set_postfix({'accuracy': acc_meter.avg})
        
        global_progress.set_postfix({"epoch":epoch, 'accuracy':acc_meter.avg*100})
Exemple #28
0
def main(args, model=None):
    assert args.eval_from is not None or model is not None
    train_set = get_dataset(
        args.dataset,
        args.data_dir,
        transform=get_aug(args.model,
                          args.image_size,
                          train=False,
                          train_classifier=True),
        train=True,
        download=args.download,  # default is False
        debug_subset_size=args.batch_size
        if args.debug else None  # Use a subset of dataset for debugging.
    )
    test_set = get_dataset(
        args.dataset,
        args.data_dir,
        transform=get_aug(args.model,
                          args.image_size,
                          train=False,
                          train_classifier=False),
        train=False,
        download=args.download,  # default is False
        debug_subset_size=args.batch_size if args.debug else None)

    train_loader = torch.utils.data.DataLoader(dataset=train_set,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers,
                                               pin_memory=True,
                                               drop_last=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_set,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.num_workers,
                                              pin_memory=True,
                                              drop_last=True)

    model = get_backbone(args.backbone)
    classifier = nn.Linear(in_features=model.output_dim,
                           out_features=len(train_set.classes),
                           bias=True).to(args.device)

    if args.local_rank >= 0 and not torch.distributed.is_initialized():
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    if model is None:
        model = get_backbone(args.backbone).to(args.device)
        save_dict = torch.load(args.eval_from, map_location=args.device)
        model.load_state_dict(
            {
                k[9:]: v
                for k, v in save_dict['state_dict'].items()
                if k.startswith('backbone.')
            },
            strict=True)

    output_dim = model.output_dim
    if args.local_rank >= 0:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)

    classifier = nn.Linear(in_features=output_dim, out_features=10,
                           bias=True).to(args.device)
    if args.local_rank >= 0:
        classifier = torch.nn.parallel.DistributedDataParallel(
            classifier,
            device_ids=[args.local_rank],
            output_device=args.local_rank)

    # define optimizer
    optimizer = get_optimizer(args.optimizer,
                              classifier,
                              lr=args.base_lr * args.batch_size / 256,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay)

    # TODO: linear lr warm up for byol simclr swav
    # args.warm_up_epochs
    # define lr scheduler
    lr_scheduler = LR_Scheduler(optimizer, args.warmup_epochs,
                                args.warmup_lr * args.batch_size / 256,
                                args.num_epochs,
                                args.base_lr * args.batch_size / 256,
                                args.final_lr * args.batch_size / 256,
                                len(train_loader))

    loss_meter = AverageMeter(name='Loss')
    acc_meter = AverageMeter(name='Accuracy')

    # Start training
    global_progress = tqdm(range(0, args.num_epochs), desc=f'Evaluating')
    for epoch in global_progress:
        loss_meter.reset()
        model.eval()
        classifier.train()
        local_progress = tqdm(train_loader,
                              desc=f'Epoch {epoch}/{args.num_epochs}',
                              disable=args.hide_progress)

        for idx, (images, labels) in enumerate(local_progress):

            classifier.zero_grad()
            with torch.no_grad():
                feature = model(images.to(args.device))

            preds = classifier(feature)

            loss = F.cross_entropy(preds, labels.to(args.device))

            loss.backward()
            optimizer.step()
            loss_meter.update(loss.item())
            lr = lr_scheduler.step()
            local_progress.set_postfix({
                'lr': lr,
                "loss": loss_meter.val,
                'loss_avg': loss_meter.avg
            })

        if args.head_tail_accuracy and epoch != 0 and (epoch +
                                                       1) != args.num_epochs:
            continue

        local_progress = tqdm(test_loader,
                              desc=f'Test {epoch}/{args.num_epochs}',
                              disable=args.hide_progress)
        classifier.eval()
        correct, total = 0, 0
        acc_meter.reset()
        for idx, (images, labels) in enumerate(local_progress):
            with torch.no_grad():
                feature = model(images.to(args.device))
                preds = classifier(feature).argmax(dim=1)
                correct = (preds == labels.to(args.device)).sum().item()
                acc_meter.update(correct / preds.shape[0])
                local_progress.set_postfix({'accuracy': acc_meter.avg})

        global_progress.set_postfix({
            "epoch": epoch,
            'accuracy': acc_meter.avg * 100
        })
Exemple #29
0
def main(args):

    train_set = get_dataset(
        args.dataset,
        args.data_dir,
        transform=get_aug(args.model,
                          args.image_size,
                          train=False,
                          train_classifier=True),
        train=True,
        download=args.download  # default is False
    )
    test_set = get_dataset(
        args.dataset,
        args.data_dir,
        transform=get_aug(args.model,
                          args.image_size,
                          train=False,
                          train_classifier=False),
        train=False,
        download=args.download  # default is False
    )

    if args.debug:
        args.batch_size = 20
        args.num_epochs = 2
        args.num_workers = 0
        train_set = torch.utils.data.Subset(train_set, range(
            0, args.batch_size))  # take only one batch
        test_set = torch.utils.data.Subset(test_set, range(0, args.batch_size))

    train_loader = torch.utils.data.DataLoader(dataset=train_set,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers,
                                               pin_memory=True,
                                               drop_last=True)
    test_loader = torch.utils.data.DataLoader(dataset=train_set,
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.num_workers,
                                              pin_memory=True,
                                              drop_last=True)

    # define model
    # model = get_model(args.model, args.backbone)
    backbone = get_backbone(args.backbone, castrate=False)
    in_features = backbone.fc.in_features
    backbone.fc = nn.Identity()
    model = backbone
    assert args.eval_from is not None
    save_dict = torch.load(args.eval_from, map_location='cpu')
    msg = model.load_state_dict(
        {
            k[9:]: v
            for k, v in save_dict['state_dict'].items()
            if k.startswith('backbone.')
        },
        strict=True)
    print(msg)
    model = model.to(args.device)
    model = torch.nn.DataParallel(model)
    # if torch.cuda.device_count() > 1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)

    classifier = nn.Linear(in_features=in_features, out_features=10,
                           bias=True).to(args.device)
    classifier = torch.nn.DataParallel(classifier)
    # breakpoint()

    # define optimizer
    optimizer = get_optimizer(args.optimizer,
                              classifier,
                              lr=args.base_lr * args.batch_size / 256,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay)

    # TODO: linear lr warm up for byol simclr swav
    # args.warm_up_epochs

    # define lr scheduler
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                              args.num_epochs,
                                                              eta_min=0)

    loss_meter = AverageMeter(name='Loss')
    acc_meter = AverageMeter(name='Accuracy')
    # Start training
    for epoch in tqdm(range(0, args.num_epochs), desc=f'Evaluating'):
        loss_meter.reset()
        model.eval()
        classifier.train()
        p_bar = tqdm(train_loader, desc=f'Epoch {epoch}/{args.num_epochs}')

        for idx, (images, labels) in enumerate(p_bar):
            # breakpoint()
            classifier.zero_grad()
            with torch.no_grad():
                feature = model(images.to(args.device))
            # breakpoint()
            preds = classifier(feature)

            loss = F.cross_entropy(preds, labels.to(args.device))
            # loss = model.forward(images1.to(args.device), images2.to(args.device))
            loss.backward()
            optimizer.step()
            loss_meter.update(loss.item())
            p_bar.set_postfix({
                "loss": loss_meter.val,
                'loss_avg': loss_meter.avg
            })

        lr_scheduler.step()

        p_bar = tqdm(test_loader, desc=f'Test {epoch}/{args.num_epochs}')
        classifier.eval()
        correct, total = 0, 0
        acc_meter.reset()
        for idx, (images, labels) in enumerate(p_bar):
            with torch.no_grad():
                feature = model(images.to(args.device))
                preds = classifier(feature).argmax(dim=1)
                correct = (preds == labels.to(args.device)).sum().item()
                acc_meter.update(correct / preds.shape[0])
                p_bar.set_postfix({'accuracy': acc_meter.avg})
Exemple #30
0
def main(args):

    train_loader = torch.utils.data.DataLoader(dataset=get_dataset(
        transform=get_aug(train=False,
                          train_classifier=True,
                          **args.aug_kwargs),
        train=True,
        **args.dataset_kwargs),
                                               batch_size=args.eval.batch_size,
                                               shuffle=True,
                                               **args.dataloader_kwargs)
    test_loader = torch.utils.data.DataLoader(dataset=get_dataset(
        transform=get_aug(train=False,
                          train_classifier=False,
                          **args.aug_kwargs),
        train=False,
        **args.dataset_kwargs),
                                              batch_size=args.eval.batch_size,
                                              shuffle=False,
                                              **args.dataloader_kwargs)

    model = get_backbone(args.model.backbone)
    classifier = nn.Linear(in_features=model.output_dim,
                           out_features=10,
                           bias=True).to(args.device)

    assert args.eval_from is not None
    save_dict = torch.load(args.eval_from, map_location='cpu')
    msg = model.load_state_dict(
        {
            k[9:]: v
            for k, v in save_dict['state_dict'].items()
            if k.startswith('backbone.')
        },
        strict=True)

    # print(msg)
    model = model.to(args.device)
    model = torch.nn.DataParallel(model)

    # if torch.cuda.device_count() > 1: classifier = torch.nn.SyncBatchNorm.convert_sync_batchnorm(classifier)
    classifier = torch.nn.DataParallel(classifier)
    # define optimizer
    optimizer = get_optimizer(args.eval.optimizer.name,
                              classifier,
                              lr=args.eval.base_lr * args.eval.batch_size /
                              256,
                              momentum=args.eval.optimizer.momentum,
                              weight_decay=args.eval.optimizer.weight_decay)

    # define lr scheduler
    lr_scheduler = LR_Scheduler(
        optimizer,
        args.eval.warmup_epochs,
        args.eval.warmup_lr * args.eval.batch_size / 256,
        args.eval.num_epochs,
        args.eval.base_lr * args.eval.batch_size / 256,
        args.eval.final_lr * args.eval.batch_size / 256,
        len(train_loader),
    )

    loss_meter = AverageMeter(name='Loss')
    acc_meter = AverageMeter(name='Accuracy')

    # Start training
    global_progress = tqdm(range(0, args.eval.num_epochs), desc=f'Evaluating')
    for epoch in global_progress:
        loss_meter.reset()
        model.eval()
        classifier.train()
        local_progress = tqdm(train_loader,
                              desc=f'Epoch {epoch}/{args.eval.num_epochs}',
                              disable=True)

        for idx, (images, labels) in enumerate(local_progress):
            # this will take the images and stick them to one another using the batch dimension
            # so it expects [C x H x W] and will turn each into a [1 x C x H x W] and then for N it will
            # concatenate them into a big tensor of [N x C x H x W]
            if type(images) == list:
                print(images[1].shape, len(images))
                images = torch.cat(
                    [image.unsqueeze(dim=0) for image in images], dim=0)

            classifier.zero_grad()
            with torch.no_grad():
                feature = model(images.to(args.device))

            preds = classifier(feature)

            loss = F.cross_entropy(preds, labels.to(args.device))

            loss.backward()
            optimizer.step()
            loss_meter.update(loss.item())
            lr = lr_scheduler.step()
            local_progress.set_postfix({
                'lr': lr,
                "loss": loss_meter.val,
                'loss_avg': loss_meter.avg
            })

    classifier.eval()
    correct, total = 0, 0
    acc_meter.reset()
    for idx, (images, labels) in enumerate(test_loader):
        with torch.no_grad():
            feature = model(images.to(args.device))
            preds = classifier(feature).argmax(dim=1)
            correct = (preds == labels.to(args.device)).sum().item()
            acc_meter.update(correct / preds.shape[0])
    print(f'Accuracy = {acc_meter.avg*100:.2f}')