Exemple #1
0
    def train(epoch):
        model.train()
        train_loss = AverageMeter()
        pbar = tqdm(total=len(train_loader))

        for batch_idx, (index, response, _, mask) in enumerate(train_loader):
            mb = response.size(0)
            index = index.to(device)
            response = response.to(device)
            mask = mask.long().to(device)
            annealing_factor = get_annealing_factor(epoch, batch_idx)
        
            optimizer.zero_grad()
            outputs = model(index, response, mask)
            loss = model.elbo(*outputs, annealing_factor=annealing_factor)
            loss.backward()
            optimizer.step()

            train_loss.update(loss.item(), mb)

            pbar.update()
            pbar.set_postfix({'Loss': train_loss.avg})

        pbar.close()
        print('====> Train Epoch: {} Loss: {:.4f}'.format(epoch, train_loss.avg))

        return train_loss.avg
Exemple #2
0
def val(val_loader, model):
    val_nmi = AverageMeter()
    model.eval()

    start_idx = 0
    with torch.no_grad():
        for it, (idx, inputs, labels) in enumerate(val_loader):

            # ============ multi-res forward passes ... ============
            emb, output = model(inputs)
            emb = emb.detach()
            bs = inputs[0].size(0)

            # ============ deepcluster-v2 val nmi ... ============
            nmi = 0
            for h in range(len(args.nmb_prototypes)):
                scores = output[h] / args.temperature
                _, cluster_assignments = scores.max(1)
                nmi += normalized_mutual_info_score(
                    labels.repeat(sum(args.nmb_crops)).cpu().numpy(),
                    cluster_assignments.cpu().numpy())
            nmi /= len(args.nmb_prototypes)

            # ============ misc ... ============
            val_nmi.update(nmi)

    return val_nmi.avg
    def get_log_marginal_density(loader):
        model.eval()
        meter = AverageMeter()
        pbar = tqdm(total=len(loader))

        with torch.no_grad():
            for _, response, _, mask in loader:
                mb = response.size(0)
                response = response.to(device)
                mask = mask.long().to(device)

                marginal = model.log_marginal(
                    response, 
                    mask, 
                    num_samples = args.num_posterior_samples,
                )
                marginal = torch.mean(marginal)
                meter.update(marginal.item(), mb)

                pbar.update()
                pbar.set_postfix({'Marginal': meter.avg})
        
        pbar.close()
        print('====> Marginal: {:.4f}'.format(meter.avg))

        return meter.avg
Exemple #4
0
    def get_log_marginal_density(loader):
        model.eval()
        meter = AverageMeter()
        pbar = tqdm(total=len(loader))

        with torch.no_grad():
            for _, response, _, mask in loader:
                mb = response.size(0)
                response = response.to(device)
                mask = mask.long().to(device)

                posterior = Importance(
                    model.model,
                    guide=model.guide,
                    num_samples=args.num_posterior_samples,
                )
                posterior = posterior.run(response, mask)
                log_weights = torch.stack(posterior.log_weights)
                marginal = torch.logsumexp(log_weights, 0) - math.log(
                    log_weights.size(0))
                meter.update(marginal.item(), mb)

                pbar.update()
                pbar.set_postfix({'Marginal': meter.avg})

        pbar.close()
        print('====> Marginal: {:.4f}'.format(meter.avg))

        return meter.avg
Exemple #5
0
    def meta_val(self, model, meta_val_way, meta_val_shot, disable_tqdm,
                 callback, epoch):
        top1 = AverageMeter()
        model.eval()

        with torch.no_grad():
            tqdm_test_loader = warp_tqdm(self.val_loader, disable_tqdm)
            for i, (inputs, target, _) in enumerate(tqdm_test_loader):
                inputs, target = inputs.to(self.device), target.to(
                    self.device, non_blocking=True)
                output = model(inputs, feature=True)[0].cuda(0)
                train_out = output[:meta_val_way * meta_val_shot]
                train_label = target[:meta_val_way * meta_val_shot]
                test_out = output[meta_val_way * meta_val_shot:]
                test_label = target[meta_val_way * meta_val_shot:]
                train_out = train_out.reshape(meta_val_way, meta_val_shot,
                                              -1).mean(1)
                train_label = train_label[::meta_val_shot]
                prediction = self.metric_prediction(train_out, test_out,
                                                    train_label)
                acc = (prediction == test_label).float().mean()
                top1.update(acc.item())
                if not disable_tqdm:
                    tqdm_test_loader.set_description('Acc {:.2f}'.format(
                        top1.avg * 100))

        if callback is not None:
            callback.scalar('val_acc', epoch + 1, top1.avg, title='Val acc')
        return top1.avg
    def train(epoch):
        model.train()
        train_loss = AverageMeter()
        pbar = tqdm(total=len(train_loader))

        for batch_idx, (index, response, _, mask) in enumerate(train_loader):
            mb = response.size(0)
            index = index.to(device)
            response = response.to(device)
            mask = mask.long().to(device)
        
            optimizer.zero_grad()
            response_mu = model(index, response, mask)
            loss = F.binary_cross_entropy(response_mu, response.float(), reduction='none')
            loss = loss * mask
            loss = loss.mean()
            loss.backward()
            optimizer.step()

            train_loss.update(loss.item(), mb)

            pbar.update()
            pbar.set_postfix({'Loss': train_loss.avg})

        pbar.close()
        print('====> Train Epoch: {} Loss: {:.4f}'.format(epoch, train_loss.avg))

        return train_loss.avg
Exemple #7
0
def test(model, criterion, test_loader, run_config):
    device = torch.device(run_config['device'])

    model.eval()

    loss_meter = AverageMeter()
    correct_meter = AverageMeter()
    start = time.time()
    with torch.no_grad():
        for step, (data, targets) in enumerate(test_loader):
            data = data.to(device)
            targets = targets.to(device)

            outputs = model(data)
            loss = criterion(outputs, targets)

            _, preds = torch.max(outputs, dim=1)

            loss_ = loss.item()
            correct_ = preds.eq(targets).sum().item()
            num = data.size(0)

            loss_meter.update(loss_, num)
            correct_meter.update(correct_, 1)

        accuracy = correct_meter.sum / len(test_loader.dataset)

        elapsed = time.time() - start

    test_log = collections.OrderedDict({
        'loss': loss_meter.avg,
        'accuracy': accuracy,
        'time': elapsed
    })
    return test_log
def validate_with_softmax(val_loader, model, criterion, epoch, writer=None, threshold=0.5):

    # switch to evaluate mode
    model.eval()

    losses = AverageMeter('Loss', ":.4e")
    top1 = AverageMeter('Acc@1', ':6.2f')

    pbar = tqdm(val_loader)
    with torch.no_grad():
        for i, (images, target) in enumerate(pbar):
            if torch.cuda.is_available():
                images = images.cuda()
                target = target.cuda()
        
            # compute output
            output = model(images)
            loss = criterion(output, target)

            acc1 = accuracy(output, target, topk=(1,))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0][0], images.size(0))

            pbar.set_description('Validation')
            
        print(" * Acc@1 {top1.avg:.3f}".format(top1=top1))
        if writer:
            writer.add_scalar('Test/Loss', losses.avg, epoch)
            writer.add_scalar('Test/Top1_acc', top1.avg, epoch)

    return top1.avg
    def test(epoch):
        model.eval()
        test_loss = AverageMeter()
        pbar = tqdm(total=len(test_loader))

        with torch.no_grad():
            for _, response, _, mask in test_loader:
                mb = response.size(0)
                response = response.to(device)
                mask = mask.long().to(device)

                if args.n_norm_flows > 0:
                    (
                        response,
                        mask,
                        response_mu,
                        ability_k,
                        ability,
                        ability_mu,
                        ability_logvar,
                        ability_logabsdetjac,
                        item_feat_k,
                        item_feat,
                        item_feat_mu,
                        item_feat_logvar,
                        item_feat_logabsdetjac,
                    ) = model(response, mask)
                    loss = model.elbo(
                        response,
                        mask,
                        response_mu,
                        ability,
                        ability_mu,
                        ability_logvar,
                        item_feat,
                        item_feat_mu,
                        item_feat_logvar,
                        use_kl_divergence=False,
                        ability_k=ability_k,
                        item_feat_k=item_feat_k,
                        ability_logabsdetjac=ability_logabsdetjac,
                        item_logabsdetjac=item_feat_logabsdetjac,
                    )
                else:
                    outputs = model(response, mask)
                    loss = model.elbo(*outputs)
                test_loss.update(loss.item(), mb)

                pbar.update()
                pbar.set_postfix({'Loss': test_loss.avg})

        pbar.close()
        print('====> Test Epoch: {} Loss: {:.4f}'.format(epoch, test_loss.avg))

        return test_loss.avg
    def train(epoch):
        model.train()
        train_loss = AverageMeter()
        pbar = tqdm(total=len(train_loader))

        for batch_idx, (_, response, _, _) in enumerate(train_loader):
            mb = response.size(0)
            item_index = torch.arange(num_item).to(device)
            response = response.to(device)

            if mb != args.batch_size:
                pbar.update()
                continue

            with torch.no_grad():
                item_index = item_index.unsqueeze(0).repeat(mb, 1)
                item_index[(response == -1).squeeze(2)] = -1

                # build what dkvmn_irt expects
                q_data = item_index.clone()
                a_data = response.clone().squeeze(2)
                # ??? https://github.com/ckyeungac/DeepIRT/blob/master/load_data.py
                qa_data = q_data + a_data * num_item
                qa_data[(response == -1).squeeze(2)] = -1

                # map q_data and qa_data to 0 to N+1
                q_data = q_data + 1
                qa_data = qa_data + 1
                label = response.clone().squeeze(2)

            optimizer.zero_grad()
            pred_zs, student_abilities, question_difficulties = \
                model(q_data, qa_data, label)
            loss = model.get_loss(
                pred_zs,
                student_abilities,
                question_difficulties,
                label,
            )
            loss.backward()
            # https://github.com/ckyeungac/DeepIRT/blob/master/configs.py
            nn.utils.clip_grad_norm(model.parameters(), args.max_grad_norm)
            optimizer.step()

            train_loss.update(loss.item(), mb)

            pbar.update()
            pbar.set_postfix({'Loss': train_loss.avg})

        pbar.close()
        print('====> Train Epoch: {} Loss: {:.4f}'.format(
            epoch, train_loss.avg))

        return train_loss.avg
Exemple #11
0
def save_json(args, model, reglog, optimizer, loader):
    pred_label = []
    log_top1 = AverageMeter()

    for iter_epoch, (inp, target) in enumerate(loader):
        # measure data loading time

        learning_rate_decay(optimizer, len(loader) * args.epoch + iter_epoch, args.lr)

        # start at iter start_iter
        if iter_epoch < args.start_iter:
            continue

        # move to gpu
        inp = inp.cuda(non_blocking=True)
        target = target.cuda(non_blocking=True)
        if 'VOC2007' in args.data_path:
            target = target.float()

        # forward
        with torch.no_grad():
            output = model(inp)

        output = reglog(output)
        _, pred = output.topk(1, 1, True, True)
        pred = pred.t()

        pred_var = pred.data.cpu().numpy().reshape(-1) 
        for i in range(len(pred_var)):
            pred_label.append(pred_var[i])
  
        prec1 = accuracy(args, output, target)
        log_top1.update(prec1.item(), output.size(0)) 


    def load_json(file_path):
        assert os.path.exists(file_path), "{} does not exist".format(file_path)
        with open(file_path, 'r') as fp:
            data = json.load(fp)
        img_names = list(data.keys())
        return img_names
    
    json_predictions,img_names = {}, []
    img_names = load_json('./val_targets.json')

    for idx in range(len(pred_label)):
        json_predictions[img_names[idx]] = int(pred_label[idx])
    output_file = os.path.join(args.json_save_path, args.json_save_name)
 
    with open(output_file, 'w') as fp:
        json.dump(json_predictions, fp)   

    return log_top1.avg
Exemple #12
0
def val(val_loader, model, queue):
    norm_mut_info = AverageMeter()
    use_the_queue = False

    model.eval()
    end = time.time()
    with torch.no_grad():
        for it, (inputs, labels) in enumerate(val_loader):
            # normalize the prototypes
            with torch.no_grad():
                w = model.module.prototypes.weight.data.clone()
                w = nn.functional.normalize(w, dim=1, p=2)
                model.module.prototypes.weight.copy_(w)

            # ============ multi-res forward passes ... ============
            embedding, output = model(inputs)
            embedding = embedding.detach()
            bs = inputs[0].size(0)

            # ============ swav loss ... ============
            loss = 0
            for i, crop_id in enumerate(args.crops_for_assign):
                with torch.no_grad():
                    out = output[bs * crop_id:bs * (crop_id + 1)].detach()

                    # time to use the queue
                    if queue is not None:
                        if use_the_queue or not torch.all(queue[i,
                                                                -1, :] == 0):
                            use_the_queue = True
                            out = torch.cat(
                                (torch.mm(queue[i],
                                          model.module.prototypes.weight.t()),
                                 out))
                        # fill the queue
                        queue[i, bs:] = queue[i, :-bs].clone()
                        queue[i, :bs] = embedding[crop_id * bs:(crop_id + 1) *
                                                  bs]

                    # get assignments
                    q = distributed_sinkhorn(out)[-bs:]

            score, cluster_assignments = q.max(1)
            cluster_assignments = cluster_assignments.cpu().numpy()
            nmi = normalized_mutual_info_score(labels.cpu().numpy(),
                                               cluster_assignments)

            # ============ misc ... ============
            norm_mut_info.update(nmi)

    return norm_mut_info.avg
Exemple #13
0
    def validate(self, epoch):
        self.model.eval()

        val_loss = AverageMeter()
        val_acc = AverageMeter()
        val_acc_cls = AverageMeter()
        val_mean_iu = AverageMeter()
        # inputs_all, gts_all, predictions_all = [], [], []

        for i, (inputs, gts) in enumerate(self.val_loader):
            N = inputs.size(0)
            inputs = inputs.to(self.device)
            gts = gts.to(self.device)
            # gts = gts.to(self.device, dtype=torch.float32)

            outputs = self.model(inputs)
            preds = torch.argmax(outputs, dim=1)
            # gts = F.upsample(torch.unsqueeze(gts, 0), outputs.size()[2:], mode='nearest')
            # gts = torch.squeeze(gts, 0).to(torch.int64)
            val_loss.update(self.criterion(outputs, gts).item(), N)
            val_metric = evaluate(preds.detach(), gts.detach(),
                                  self.num_classes)
            val_acc.update(val_metric[0])
            val_acc_cls.update(val_metric[1])
            val_mean_iu.update(val_metric[2])

        return val_loss, val_acc, val_acc_cls, val_mean_iu
Exemple #14
0
    def train(self, epoch):
        self.model.train()

        train_loss = AverageMeter()
        train_acc = AverageMeter()
        train_acc_cls = AverageMeter()
        train_mean_iu = AverageMeter()

        for i, (inputs, targets) in enumerate(self.train_loader):
            inputs = inputs.to(self.device)
            targets = targets.to(self.device)
            # targets = targets.to(self.device, dtype=torch.float32)
            self.optim.zero_grad()
            outputs = self.model(inputs)
            preds = torch.argmax(outputs, dim=1)

            # targets = F.upsample(torch.unsqueeze(targets, 0), outputs.size()[2:], mode='nearest')
            # targets = torch.squeeze(targets, 0).to(torch.int64)
            loss = self.criterion(outputs, targets)
            loss.backward()
            self.optim.step()

            train_loss.update(loss.item(), inputs.size(0))
            train_metric = evaluate(preds.detach(), targets.detach(),
                                    self.num_classes)
            train_acc.update(train_metric[0])
            train_acc_cls.update(train_metric[1])
            train_mean_iu.update(train_metric[2])

            if epoch == 0 and i == 1:
                print('iteration is started on {}'.format(self.device))

        return train_loss, train_acc, train_acc_cls, train_mean_iu
    def test(epoch):
        model.eval()
        test_loss = AverageMeter()
        pbar = tqdm(total=len(test_loader))

        with torch.no_grad():
            for _, response, _, _ in test_loader:
                mb = response.size(0)
                item_index = torch.arange(num_item).to(device)
                response = response.to(device)

                if mb != args.batch_size:
                    pbar.update()
                    continue

                with torch.no_grad():
                    item_index = item_index.unsqueeze(0).repeat(mb, 1)
                    item_index[(response == -1).squeeze(2)] = -1

                    # build what dkvmn_irt expects
                    q_data = item_index.clone()
                    a_data = response.clone().squeeze(2)
                    # ??? https://github.com/ckyeungac/DeepIRT/blob/master/load_data.py
                    qa_data = q_data + a_data * num_item
                    qa_data[(response == -1).squeeze(2)] = -1

                    # map q_data and qa_data to 0 to N+1
                    q_data = q_data + 1
                    qa_data = qa_data + 1
                    label = response.clone().squeeze(2)

                pred_zs, student_abilities, question_difficulties = \
                    model(q_data, qa_data, label)
                loss = model.get_loss(
                    pred_zs,
                    student_abilities,
                    question_difficulties,
                    label,
                )
                test_loss.update(loss.item(), mb)

                pbar.update()
                pbar.set_postfix({'Loss': test_loss.avg})

        pbar.close()
        print('====> Test Epoch: {} Loss: {:.4f}'.format(epoch, test_loss.avg))

        return test_loss.avg
    def train(epoch):
        model.train()
        train_loss = AverageMeter()
        pbar = tqdm(total=len(train_loader))

        for batch_idx, (_, response, _, mask) in enumerate(train_loader):
            mb = response.size(0)
            response = response.to(device)
            mask = mask.long().to(device)
            annealing_factor = get_annealing_factor(epoch, batch_idx)
        
            optimizer.zero_grad()
            if args.n_norm_flows > 0:
                (
                    response, mask, response_mu, 
                    ability_k, ability, 
                    ability_mu, ability_logvar, ability_logabsdetjac, 
                    item_feat_k, item_feat, 
                    item_feat_mu, item_feat_logvar, item_feat_logabsdetjac,
                ) = model(response, mask)
                loss = model.elbo(
                    response, mask, response_mu, 
                    ability, ability_mu, ability_logvar,
                    item_feat, item_feat_mu, item_feat_logvar, 
                    annealing_factor = annealing_factor,
                    use_kl_divergence = False,
                    ability_k = ability_k,
                    item_feat_k = item_feat_k,
                    ability_logabsdetjac = ability_logabsdetjac,
                    item_logabsdetjac = item_feat_logabsdetjac,
                )
            else:
                outputs = model(response, mask)
                loss = model.elbo(*outputs, annealing_factor=annealing_factor,
                                use_kl_divergence=True)
            loss.backward()
            optimizer.step()

            train_loss.update(loss.item(), mb)

            pbar.update()
            pbar.set_postfix({'Loss': train_loss.avg})

        pbar.close()
        print('====> Train Epoch: {} Loss: {:.4f}'.format(epoch, train_loss.avg))

        return train_loss.avg
Exemple #17
0
    def val(epoch):
        model.eval()
        loss_meter = AverageMeter()

        with torch.no_grad():
            for data in val_loader:
                batch_size = data.size(0)
                data = data.to(device)

                z_mu, z_logvar = model(data)
                loss = compiled_inference_objective(z, z_mu, z_logvar)

                loss_meter.update(loss.item(), batch_size)

        print('====> Test Epoch: {}\tLoss: {:.4f}'.format(
            epoch, loss_meter.avg))
        return loss_meter.avg
Exemple #18
0
def train(model, optimizer, scheduler, criterion, train_loader, run_config):

    device = torch.device(run_config['device'])

    for param_group in optimizer.param_groups:
        current_lr = param_group['lr']

    model.train()

    loss_meter = AverageMeter()
    accuracy_meter = AverageMeter()
    start = time.time()

    for step, (data, targets) in enumerate(train_loader):

        if torch.cuda.device_count() == 1:
            data = data.to(device)
            targets = targets.to(device)

        optimizer.zero_grad()

        outputs = model(data)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        loss_ = loss.item()
        num = data.size(0)
        accuracy = utils.accuracy(outputs, targets)[0].item()

        loss_meter.update(loss_, num)
        accuracy_meter.update(accuracy, num)

        if scheduler is not None:
            scheduler.step()

    elapsed = time.time() - start

    train_log = collections.OrderedDict({
        'loss': loss_meter.avg,
        'accuracy': accuracy_meter.avg,
        'time': elapsed
    })
    return train_log
Exemple #19
0
def validate_network(val_loader, model, linear_classifier):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top2 = AverageMeter()
    global best_acc

    # switch to evaluate mode
    model.eval()
    linear_classifier.eval()

    criterion = nn.CrossEntropyLoss().cuda()

    with torch.no_grad():
        end = time.perf_counter()
        for i, (inp, target) in enumerate(val_loader):

            # move to gpu
            inp = inp.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

            # compute output
            output = linear_classifier(model(inp))
            loss = criterion(output, target)

            acc1, acc2 = accuracy(output, target, topk=(1, 2))
            losses.update(loss.item(), inp.size(0))

            top2.update(acc2[0], inp.size(0))
            top1.update(acc1[0], inp.size(0))

            # measure elapsed time
            batch_time.update(time.perf_counter() - end)
            end = time.perf_counter()

    if top1.avg.item() > best_acc:
        best_acc = top1.avg.item()

    if args.rank == 0:
        logger.info("Test:\t"
                    "Time {batch_time.avg:.3f}\t"
                    "Loss {loss.avg:.4f}\t"
                    "Acc@1 {top1.avg:.3f}\t"
                    "Acc@2 {top2.avg:.3f}"
                    "Best Acc@1 so far {acc:.1f}".format(batch_time=batch_time,
                                                         loss=losses,
                                                         top1=top1,
                                                         top2=top2,
                                                         acc=best_acc))

    return losses.avg, top1.avg.item(), top2.avg.item()
Exemple #20
0
    def test(epoch):
        model.eval()
        test_loss = AverageMeter()
        pbar = tqdm(total=len(test_loader))

        with torch.no_grad():
            for _, response, _, mask in test_loader:
                mb = response.size(0)
                response = response.to(device)
                mask = mask.long().to(device)

                loss = svi.evaluate_loss(response, mask)
                test_loss.update(loss, mb)

                pbar.update()
                pbar.set_postfix({'Loss': test_loss.avg})

        pbar.close()
        print('====> Test Epoch: {} Loss: {:.4f}'.format(epoch, test_loss.avg))

        return test_loss.avg
Exemple #21
0
    def train(epoch):
        model.train()
        train_loss = AverageMeter()
        pbar = tqdm(total=len(train_loader))

        for batch_idx, (_, response, _, mask) in enumerate(train_loader):
            mb = response.size(0)
            response = response.to(device)
            mask = mask.long().to(device)
            annealing_factor = get_annealing_factor(epoch, batch_idx)

            loss = svi.step(response, mask, annealing_factor)
            train_loss.update(loss, mb)

            pbar.update()
            pbar.set_postfix({'Loss': train_loss.avg})

        pbar.close()
        print('====> Train Epoch: {} Loss: {:.4f}'.format(
            epoch, train_loss.avg))

        return train_loss.avg
    def test(epoch):
        model.eval()
        test_loss = AverageMeter()
        pbar = tqdm(total=len(test_loader))

        with torch.no_grad():
            for index, response, _, mask in test_loader:
                mb = response.size(0)
                index = index.to(device)
                response = response.to(device)
                mask = mask.long().to(device)

                response_mu = model(index, response, mask)
                loss = F.binary_cross_entropy(response_mu, response.float())
                test_loss.update(loss.item(), mb)

                pbar.update()
                pbar.set_postfix({'Loss': test_loss.avg})

        pbar.close()
        print('====> Test Epoch: {} Loss: {:.4f}'.format(epoch, test_loss.avg))

        return test_loss.avg
Exemple #23
0
    def step(self):
        with torch.no_grad():
            stats = AverageMeter()
            weight_decays = []
            for group in self.optim.param_groups:

                # -- takes weight decay control from wrapped optimizer
                weight_decay = group[
                    'weight_decay'] if 'weight_decay' in group else 0
                weight_decays.append(weight_decay)

                # -- user wants to exclude this parameter group from LARS
                #    adaptation
                if ('LARS_exclude' in group) and group['LARS_exclude']:
                    continue
                group['weight_decay'] = 0

                for p in group['params']:
                    if p.grad is None:
                        continue
                    param_norm = torch.norm(p.data)
                    grad_norm = torch.norm(p.grad.data)

                    if param_norm != 0 and grad_norm != 0:
                        adaptive_lr = self.trust_coefficient * (param_norm) / (
                            grad_norm + param_norm * weight_decay + self.eps)

                        stats.update(adaptive_lr)
                        p.grad.data += weight_decay * p.data
                        p.grad.data *= adaptive_lr

        self.optim.step()
        # -- return weight decay control to wrapped optimizer
        for i, group in enumerate(self.optim.param_groups):
            group['weight_decay'] = weight_decays[i]

        return stats
Exemple #24
0
    def train(epoch):
        model.train()
        loss_meter = AverageMeter()

        for batch_idx, data_list in enumerate(train_loader):
            x_list = [data[0] for data in data_list]
            batch_size = len(x_list[0])

            loss = 0
            for i in range(n_planes):
                x_i = x_list[i]
                x_i = x_i.to(device)

                context_x_i, context_z_i = sample_minibatch(
                    train_datasets[i], batch_size, args.n_mlp_samples)
                context_x_i = context_x_i.to(device)
                context_z_i = context_z_i.to(device)
                context_x_z_i = torch.cat([context_x_i, context_z_i], dim=2)

                z_mu_i, z_logvar_i = model(x_i, context_x_z_i)
                loss_i = compiled_inference_objective(z_i, z_mu_i, z_logvar_i)
                loss += loss_i

            loss_meter.update(loss.item(), batch_size)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % args.log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * batch_size, len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), -loss_meter.avg))

        print('====> Train Epoch: {}\tLoss: {:.4f}'.format(
            epoch, -loss_meter.avg))
        return loss_meter.avg
Exemple #25
0
    def train(epoch):
        model.train()
        loss_meter = AverageMeter()

        for batch_idx, data in enumerate(train_loader):
            batch_size = data.size(0)
            data = data.to(device)

            z_mu, z_logvar = model(data)
            loss = compiled_inference_objective(z, z_mu, z_logvar)
            loss_meter.update(loss.item(), batch_size)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % args.log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * batch_size, len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), -loss_meter.avg))

        print('====> Train Epoch: {}\tLoss: {:.4f}'.format(
            epoch, loss_meter.avg))
        return loss_meter.avg
def val_epoch(epoch, data_loader, model, criterion, opt, logger):
    print('\t************** VALIDATION **************')
    model.eval()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    accuracies = AverageMeter()

    end_time = time.time()
    for i, (inputs, targets) in enumerate(data_loader):
        data_time.update(time.time() - end_time)

        if not opt.no_cuda:
            targets = targets.cuda(async=True)
        inputs = Variable(inputs, volatile=True)
        targets = Variable(targets, volatile=True)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        acc = calculate_accuracy(outputs, targets)

        losses.update(loss.item(), inputs.size(0))
        accuracies.update(acc, inputs.size(0))

        batch_time.update(time.time() - end_time)
        end_time = time.time()

        print('\tBatch: [{0}/{1}]\t'
              'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
              'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(i + 1,
                                                         len(data_loader),
                                                         batch_time=batch_time,
                                                         data_time=data_time,
                                                         loss=losses,
                                                         acc=accuracies))

    logger.log({'epoch': epoch, 'loss': losses.avg, 'acc': accuracies.avg})

    return accuracies.avg
def train(train_loader, model, optimizer, epoch, lr_schedule, queue, args):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    softmax = nn.Softmax(dim=1).cuda()
    model.train()

    end = time.time()
    for it, inputs in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        # update learning rate
        iteration = epoch * len(train_loader) + it
        for param_group in optimizer.param_groups:
            param_group["lr"] = lr_schedule[iteration]

        # normalize the prototypes
        with torch.no_grad():
            w = model.module.prototypes.weight.data.clone()
            w = nn.functional.normalize(w, dim=1, p=2)
            model.module.prototypes.weight.copy_(w)

        # ============ data split ===========
        inputs, target = inputs
        # ============ multi-res forward passes ... ============
        embedding, output = model(inputs)
        embedding = embedding.detach()
        bs = inputs[0].size(0)

        # ============ EMA class-wise feature vector ==========
        for b in range(bs):
            queue[target[b]] = queue[target[b]] * 0.99 + (
                embedding[b] + embedding[bs + b]) * 0.01 / 2
        queue = nn.functional.normalize(queue, dim=1, p=2)
        dist.all_reduce(queue)
        queue /= args.world_size
        queue = nn.functional.normalize(queue, dim=1, p=2)
        # ============ swav loss ... ============
        loss = 0

        with torch.no_grad():
            q = torch.mm(queue, model.module.prototypes.weight.t())
            q = q / args.epsilon
            if args.improve_numerical_stability:
                M = torch.max(q)
                dist.all_reduce(M, op=dist.ReduceOp.MAX)
                q -= M

            q = torch.exp(q).t()
            q = sinkhorn(q, args.sinkhorn_iterations)
        # q = distributed_sinkhorn(q, args.sinkhorn_iterations)

        # match q /w label (1000, num_p) --> (bsz, num_p)
        for b in range(bs):
            if b == 0:
                matched_q = q[target[b]].unsqueeze(0)
            else:
                matched_q = torch.cat([matched_q, q[target[b]].unsqueeze(0)],
                                      0)

        # cluster assignment prediction
        subloss = 0
        for v in np.arange(np.sum(args.nmb_crops)):
            p = softmax(output[bs * v:bs * (v + 1)] / args.temperature)
            subloss -= torch.mean(torch.sum(matched_q * torch.log(p), dim=1))
        loss += subloss / np.sum(args.nmb_crops)

        # ============ backward and optim step ... ============
        optimizer.zero_grad()
        if args.use_fp16:
            with apex.amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        # cancel some gradients
        if iteration < args.freeze_prototypes_niters:
            for name, p in model.named_parameters():
                if "prototypes" in name:
                    p.grad = None
        optimizer.step()

        # ============ misc ... ============
        losses.update(loss.item(), inputs[0].size(0))
        batch_time.update(time.time() - end)
        end = time.time()
        if args.rank == 0 and it % 50 == 0:
            logger.info("Epoch: [{0}][{1}]\t"
                        "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
                        "Data {data_time.val:.3f} ({data_time.avg:.3f})\t"
                        "Loss {loss.val:.4f} ({loss.avg:.4f})\t"
                        "Lr: {lr:.4f}".format(
                            epoch,
                            it,
                            batch_time=batch_time,
                            data_time=data_time,
                            loss=losses,
                            lr=optimizer.optim.param_groups[0]["lr"],
                        ))
    return (epoch, losses.avg), queue
Exemple #28
0
def train_loc_model(model, data_loaders, optimizer, scheduler, seg_loss, num_epochs, weight_dir, snapshot_name, log_dir, best_score=0):

    writer = SummaryWriter(log_dir + 'localization')
    print('Tensorboard is recording into folder: ' + log_dir + 'localization')

    torch.cuda.empty_cache()

    for epoch in range(num_epochs):
        losses = AverageMeter()

        dices = AverageMeter()
        iterator = data_loaders['train']
        iterator = tqdm(iterator)
        model.train()
        for i, sample in enumerate(iterator):
            imgs = sample["img"].cuda(non_blocking=True)
            msks = sample["msk"].cuda(non_blocking=True)
        
            out = model(imgs)

            loss = seg_loss(out, msks)

            with torch.no_grad():
                _probs = torch.sigmoid(out[:, 0, ...])
                dice_sc = 1 - dice_round(_probs, msks[:, 0, ...])

            losses.update(loss.item(), imgs.size(0))

            dices.update(dice_sc, imgs.size(0))

            iterator.set_description("Epoch {}/{}, lr {:.7f}; Loss {loss.val:.4f} ({loss.avg:.4f}); Dice {dice.val:.4f} ({dice.avg:.4f})".format(
                    epoch, num_epochs, scheduler.get_lr()[-1], loss=losses, dice=dices))
            
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.999)
            optimizer.step()

            writer.add_scalar('Train/Loss', losses.avg, epoch)
            writer.add_scalar('Train/Dice', dices.avg, epoch)
            writer.flush()
        
        if epoch % 2 == 0:
            torch.cuda.empty_cache()

            model = model.eval()
            dices0 = []

            _thr = 0.5
            iterator = data_loaders['val']
            iterator = tqdm(iterator)
            with torch.no_grad():
                for i, sample in enumerate(iterator):
                    msks = sample["msk"].numpy()
                    imgs = sample["img"].cuda(non_blocking=True)
            
                    out = model(imgs)

                    msk_pred = torch.sigmoid(out[:, 0, ...]).cpu().numpy()
            
                    for j in range(msks.shape[0]):
                        dices0.append(dice(msks[j, 0], msk_pred[j] > _thr))

            d = np.mean(dices0)

            writer.add_scalar('Val/Dice', d, epoch)
            writer.flush()

            print("Val Dice: {}".format(d))

            if d > best_score:
                best_score = d
                torch.save({
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_score': d,
                }, path.join(weight_dir, snapshot_name + '_best'))

            print("score: {}\tscore_best: {}".format(d, best_score))

        writer.close()
            
    return best_score
Exemple #29
0
def train_cls_model(model, data_loaders, optimizer, scheduler, seg_loss, ce_loss, num_epochs, weight_dir, snapshot_name, log_dir, best_score=0):
    torch.cuda.empty_cache()

    writer = SummaryWriter(log_dir + 'classification')
    print('Tensorboard is recording into folder: ' + log_dir + 'classification')

    for epoch in range(num_epochs):
        losses = AverageMeter()
        dices = AverageMeter()
        
        iterator = data_loaders['train']
        iterator = tqdm(iterator)
        model.train()
        for i, sample in enumerate(iterator):
            imgs = sample["img"].cuda(non_blocking=True)
            msks = sample["msk"].cuda(non_blocking=True)
            lbl_msk = sample["lbl_msk"].cuda(non_blocking=True)
        
            out = model(imgs)

            loss_loc = seg_loss(out[:, 0, ...], msks[:, 0, ...])
            loss1 = seg_loss(out[:, 1, ...], msks[:, 1, ...])
            loss2 = seg_loss(out[:, 2, ...], msks[:, 2, ...])
            loss3 = seg_loss(out[:, 3, ...], msks[:, 3, ...])
            loss4 = seg_loss(out[:, 4, ...], msks[:, 4, ...])

            loss5 = ce_loss(out, lbl_msk)

            loss = 0.1 * loss_loc + 0.1 * loss1 + 0.3 * loss2 + 0.3 * loss3 + 0.2 * loss4 + loss5 * 11

            with torch.no_grad():
                _probs = torch.sigmoid(out[:, 0, ...])
                dice_sc = 1 - dice_round(_probs, msks[:, 0, ...])

            losses.update(loss.item(), imgs.size(0))

            dices.update(dice_sc, imgs.size(0))

            iterator.set_description("Epoch {}/{}, lr {:.7f}; Loss {loss.val:.4f} ({loss.avg:.4f}); Dice {dice.val:.4f} ({dice.avg:.4f})".format(
                    epoch, num_epochs, scheduler.get_lr()[-1], loss=losses, dice=dices))
        
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.999)
            optimizer.step()

            writer.add_scalar('Train/Loss', losses.avg, epoch)
            writer.add_scalar('Train/Dice', dices.avg, epoch)
            writer.add_scalar('Train/Loc_loss', loss_loc, epoch)
            writer.add_scalar('Train/NoDamage_loss', loss1, epoch)
            writer.add_scalar('Train/MinorDamage_loss', loss2, epoch)
            writer.add_scalar('Train/MajorDamage_loss', loss3, epoch)
            writer.add_scalar('Train/Destroyed_loss', loss4, epoch)
            writer.add_scalar('Train/Cls_loss', loss4, epoch)

            writer.flush()
        
        if epoch % 2 == 0:
            torch.cuda.empty_cache()

            model = model.eval()
            dices0 = []

            tp = np.zeros((4,))
            fp = np.zeros((4,))
            fn = np.zeros((4,))

            _thr = 0.3
            
            iterator = data_loaders['val']
            iterator = tqdm(iterator)
            with torch.no_grad():
                for i, sample in enumerate(iterator):
                    msks = sample["msk"].numpy()
                    lbl_msk = sample["lbl_msk"].numpy()
                    imgs = sample["img"].cuda(non_blocking=True)
                    out = model(imgs)
                    
                    msk_pred = torch.sigmoid(out[:, 0, ...]).cpu().numpy()
                    msk_damage_pred = torch.sigmoid(out).cpu().numpy()[:, 1:, ...]
            
                    for j in range(msks.shape[0]):
                        dices0.append(dice(msks[j, 0], msk_pred[j] > _thr))

                        targ = lbl_msk[j][msks[j, 0] > 0]
                        pred = msk_damage_pred[j].argmax(axis=0)
                        pred = pred * (msk_pred[j] > _thr)
                        pred = pred[msks[j, 0] > 0]
                        for c in range(4):
                            tp[c] += np.logical_and(pred == c, targ == c).sum()
                            fn[c] += np.logical_and(pred != c, targ == c).sum()
                            fp[c] += np.logical_and(pred == c, targ != c).sum()

            d0 = np.mean(dices0)
            f1_sc = np.zeros((4,))
            
            for c in range(4):
                f1_sc[c] = 2 * tp[c] / (2 * tp[c] + fp[c] + fn[c])

            f1 = 4 / np.sum(1.0 / (f1_sc + 1e-6))

            sc = 0.3 * d0 + 0.7 * f1
            print("Val Score: {}, Dice: {}, F1: {}, F1_no-damage: {}, F1_minor-damage: {}, F1_major-damage: {}, F1_destroyed: {}".format(
                sc, d0, f1, f1_sc[0], f1_sc[1], f1_sc[2], f1_sc[3]))

            writer.add_scalar('Val/Score', sc, epoch)
            writer.add_scalar('Val/Dice', d0, epoch)
            writer.add_scalar('Val/NoDamage_F1', f1, epoch)
            writer.add_scalar('Val/MinorDamage_F1', f1_sc[0], epoch)
            writer.add_scalar('Val/MajorDamage_F1', f1_sc[1], epoch)
            writer.add_scalar('Val/Destroyed_F1', f1_sc[2], epoch)
            writer.add_scalar('Val/Cls_F1', f1_sc[3], epoch)

            writer.flush()
            
            if sc > best_score:
                torch.save({
                    'epoch': epoch + 1,
                    'state_dict': model.state_dict(),
                    'best_score': sc,
                }, path.join(weight_dir, snapshot_name + '_best'))
                best_score = sc

            print("score: {}\tscore_best: {}".format(sc, best_score))
        
        writer.close()

    return best_score
Exemple #30
0
def train(loader, model, optimizer, epoch, schedule, local_memory_index,
          local_memory_embeddings):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    model.train()
    cross_entropy = nn.CrossEntropyLoss(ignore_index=-100)

    assignments = cluster_memory(model, local_memory_index,
                                 local_memory_embeddings, len(loader.dataset))
    logger.info('Clustering for epoch {} done.'.format(epoch))

    end = time.time()
    start_idx = 0
    for it, (idx, inputs) in enumerate(loader):
        # measure data loading time
        data_time.update(time.time() - end)

        # update learning rate
        iteration = epoch * len(loader) + it
        for param_group in optimizer.param_groups:
            param_group["lr"] = schedule[iteration]

        # ============ multi-res forward passes ... ============
        emb, output = model(inputs)
        emb = emb.detach()
        bs = inputs[0].size(0)

        # ============ deepcluster-v2 loss ... ============
        loss = 0
        for h in range(len(args.nmb_prototypes)):
            scores = output[h] / args.temperature
            targets = assignments[h][idx].repeat(sum(
                args.nmb_crops)).cuda(non_blocking=True)
            loss += cross_entropy(scores, targets)
        loss /= len(args.nmb_prototypes)

        # ============ backward and optim step ... ============
        optimizer.zero_grad()
        loss.backward()

        # cancel some gradients
        if iteration < args.freeze_prototypes_niters:
            for name, p in model.named_parameters():
                if "prototypes" in name:
                    p.grad = None
        optimizer.step()

        # ============ update memory banks ... ============
        local_memory_index[start_idx:start_idx + bs] = idx
        for i, crop_idx in enumerate(args.crops_for_assign):
            local_memory_embeddings[i][start_idx : start_idx + bs] = \
                emb[crop_idx * bs : (crop_idx + 1) * bs]
        start_idx += bs

        # ============ misc ... ============
        losses.update(loss.item(), inputs[0].size(0))
        batch_time.update(time.time() - end)
        end = time.time()
        if args.rank == 0 and it % 50 == 0:
            logger.info("Epoch: [{0}][{1}]\t"
                        "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
                        "Data {data_time.val:.3f} ({data_time.avg:.3f})\t"
                        "Loss {loss.val:.4f} ({loss.avg:.4f})\t"
                        "Lr: {lr:.4f}".format(
                            epoch,
                            it,
                            batch_time=batch_time,
                            data_time=data_time,
                            loss=losses,
                            lr=optimizer.optim.param_groups[0]["lr"],
                        ))
    return (epoch, losses.avg), local_memory_index, local_memory_embeddings