コード例 #1
0
def cpu_sk(self):
    """ Sinkhorn Knopp optimization on CPU
        * stores activations to RAM
        * does matrix-vector multiplies on CPU
        * slower than GPU
    """
    # 1. aggregate inputs:
    N = len(self.pseudo_loader.dataset)
    if self.num_heads == 1:
        self.PS = np.zeros((N, self.num_clusters_per_head), dtype=self.dtype)
    else:
        self.PS_pre = np.zeros((N, self.presize), dtype=self.dtype)
    now = time.time()
    l_dl = len(self.pseudo_loader)
    time.time()
    batch_time = MovingAverage(intertia=0.9)
    self.model.headcount = 1
    for batch_idx, (data, _, _selected) in enumerate(self.pseudo_loader):
        data = data.to(self.device)
        mass = data.size(0)
        if self.num_heads == 1:
            p = nn.functional.softmax(self.model(data), 1)
            self.PS[_selected, :] = p.detach().cpu().numpy().astype(self.dtype)
        else:
            p = self.model(data)
            self.PS_pre[_selected, :] = p.detach().cpu().numpy().astype(self.dtype)
        batch_time.update(time.time() - now)
        now = time.time()
        if batch_idx % 50 == 0:
            print(f"Aggregating batch {batch_idx:03}/{l_dl}, speed: {mass / batch_time.avg:04.1f}Hz",
                  end='\r', flush=True)
    self.model.headcount = self.num_heads
    print("Aggreg of outputs  took {0:.2f} min".format((time.time() - now) / 60.), flush=True)

    # 2. solve label assignment via sinkhorn-knopp:
    if self.num_heads == 1:
        optimize_L_sk(self, nh=0)
    else:
        for nh in range(self.num_heads):
            print(f"computing head {nh} ", end="\r", flush=True)
            tl = getattr(self.model, f"top_layer{nh:d}")
            time_mat = time.time()

            # clear memory
            try:
                del self.PS
            except:
                pass

            # apply last FC layer (a matmul and adding of bias)
            self.PS = (self.PS_pre @ tl.weight.cpu().numpy().T.astype(self.dtype)
                       + tl.bias.cpu().numpy().astype(self.dtype))
            print(f"matmul took {(time.time() - time_mat) / 60:.2f}min", flush=True)
            self.PS = py_softmax(self.PS, 1)
            optimize_L_sk(self, nh=nh)
    return
コード例 #2
0
ファイル: multigpu.py プロジェクト: speedcell4/self-label
def aggreg_multi_gpu(model,
                     dataloader,
                     hc,
                     dim,
                     TYPE=torch.float64,
                     model_gpus=1):
    """"Accumulate activations and save them on multiple GPUs
        * this function assumes the model is on the first `model_gpus` GPUs
          so that it can write the activations on the remaining ones
        * it splits the activations evenly between the remaining GPUs
    """
    # number of gpus to store
    ngpu_store = torch.cuda.device_count() - model_gpus

    # number of batches in DL
    l_dl = len(dataloader)

    # number of batches each gpu gets
    batches_per_gpu = l_dl // ngpu_store

    # number of data each gpu gets
    points_per_gpu = batches_per_gpu * dataloader.batch_size

    # empty array of indices that we need to keep track of
    indices = torch.empty(len(dataloader.dataset), dtype=torch.long)

    # set up matrix PS: (N x K) when using one head, otherwise N x D, where D is the dim before the last FC layer.
    PS = [
        torch.empty(points_per_gpu, dim, device='cuda:' + str(i), dtype=TYPE)
        for i in range(model_gpus, model_gpus + ngpu_store - 1)
    ]
    # accomodate remainder
    PS.append(
        torch.empty(len(dataloader.dataset) -
                    (ngpu_store - 1) * points_per_gpu,
                    dim,
                    device='cuda:' + str(model_gpus + ngpu_store - 1),
                    dtype=TYPE))

    # slice sizes, i.e. how many activations will be on the gpus
    slices = [qq.shape[0] for qq in PS]
    print("slice sizes: ", slices, flush=True)
    batch_time = MovingAverage(intertia=0.9)
    now = time.time()
    st = 0
    softmax = torch.nn.Softmax(dim=1).to('cuda:0')

    # switch the model to not output array but instead last-FC output for one head and pre-last activations for multi-heads
    model.headcount = 1
    for batch_idx, (data, _, _selected) in enumerate(dataloader):
        data = data.to(torch.device('cuda:0'))
        mass = data.size(0)
        en = st + mass
        # j keeps track of which part of PS we're writing to
        j = min((batch_idx // batches_per_gpu), ngpu_store - 1)
        subs = j * points_per_gpu
        if hc == 1:
            p = softmax(model(data)).detach().to(TYPE)
            # when using one head: save softmax (N x K) matrix:
            PS[j][st - subs:en - subs, :].copy_(p)
        else:
            # when using multiple heads: save softmax (N x D) matrix
            PS[j][st - subs:en - subs, :].copy_(model(data).detach())
        indices[st:en].copy_(_selected)
        st = en
        batch_time.update(time.time() - now)
        now = time.time()
        if batch_idx % 50 == 0:
            print(
                f"Aggregating batch {batch_idx:03}/{l_dl}, speed: {mass / batch_time.avg:04.1f}Hz. To rGPU {j + 1}",
                end='\r',
                flush=True)
    torch.cuda.synchronize()  # just in case
    return PS, indices
コード例 #3
0
    def optimize_epoch(self,
                       model,
                       criterion,
                       optimizer,
                       loader,
                       epoch,
                       is_validation=False):
        top1 = []
        top5 = []
        loss_value = []
        for i in range(len(model.probes)):
            top1.append(TotalAverage())
            top5.append(TotalAverage())
            loss_value.append(TotalAverage())
        batch_time = MovingAverage(intertia=0.9)
        now = time.time()

        if is_validation is False:
            model.train()
            lr = self.lr_schedule(epoch)
            for pg in optimizer.param_groups:
                pg['lr'] = lr
            print(f"Starting epoch {epoch} with learning rate {lr}")
        else:
            model.eval()
        for iter, (input, label) in enumerate(loader):
            input = input.to('cuda:0')
            label = label.to('cuda:0')
            mass = input.size(0)
            total_loss = None
            if args.data in ['Imagenet', 'Places'
                             ] and is_validation and args.tencrops:
                bs, ncrops, c, h, w = input.size()
                input_tensor = input.view(-1, c, h, w)
                input = torch.autograd.Variable(input_tensor.cuda())
            else:
                input = torch.autograd.Variable(input.cuda())

            predictions = model(input)
            if args.data in ['Imagenet', 'Places'
                             ] and is_validation and args.tencrops:
                predictions = [
                    torch.squeeze(p.view(bs, ncrops, -1).mean(1))
                    for p in predictions
                ]
            for i, prediction in enumerate(predictions):
                loss = criterion(prediction, label)
                if total_loss is None:
                    total_loss = loss
                else:
                    total_loss = total_loss + loss
                top1_, top5_ = accuracy(prediction, label, topk=(1, 5))
                top1[i].update(top1_.item(), mass)
                top5[i].update(top5_.item(), mass)
                loss_value[i].update(loss.item(), mass)

            if is_validation is False:
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

            batch_time.update(time.time() - now)
            now = time.time()

        top1_str = 'top1 val' if is_validation else 'top1 train'
        top5_str = 'top5 val' if is_validation else 'top5 train'
        writer.add_scalars(
            top1_str,
            {f"depth_{k+1}": top1[k].avg
             for k in range(len(model.probes))}, epoch)
        writer.add_scalars(
            top5_str,
            {f"depth_{k+1}": top5[k].avg
             for k in range(len(model.probes))}, epoch)
        writer.add_scalars('losses', {
            f"depth_{k+1}": loss_value[k].avg
            for k in range(len(model.probes))
        }, epoch)
        if is_validation:
            print('VAL:')
            for i in range(len(model.probes)):
                print(
                    f" [{i}] t1:{top1[i].avg:04.2f} loss:{loss_value[i].avg:.2f}",
                    end='')
            print()
        else:
            print('TRAIN:')
            for i in range(len(model.probes)):
                print(
                    f" [{i}] t1:{top1[i].avg:04.2f} loss:{loss_value[i].avg:.2f}",
                    end='')
            print()

        return {
            "loss": [x.avg for x in loss_value],
            "top1": [x.avg for x in top1],
            "top5": [x.avg for x in top5]
        }
コード例 #4
0
    def optimize_epoch(self,
                       model,
                       criterion,
                       optimizer,
                       loader,
                       epoch,
                       is_validation=False):
        top1 = []
        top5 = []
        loss_value = []
        top1.append(TotalAverage())
        top5.append(TotalAverage())
        loss_value.append(TotalAverage())
        batch_time = MovingAverage(intertia=0.9)
        now = time.time()

        if is_validation is False:
            model.run()
            lr = self.lr_schedule(epoch)
            for pg in optimizer.param_groups:
                pg['lr'] = lr
            print("Starting epoch %s" % epoch)
        else:
            model.eval()
        l_dl = len(loader)
        for iter, q in enumerate(loader):
            if len(q) == 3:
                input, label, _s = q
            else:
                input, label = q
            input = input.to(self.dev)
            label = label.to(self.dev)
            mass = input.size(0)
            if is_validation and args.tencrops:
                bs, ncrops, c, h, w = input.size()
                input_tensor = input.view(-1, c, h, w)
                input = input_tensor.to(self.dev)
                predictions = model(input)
                predictions = torch.squeeze(
                    predictions.view(bs, ncrops, -1).mean(1))
            else:
                input = input.to(self.dev)
                predictions = model(input)

            loss = criterion(predictions, label)
            top1_, top5_ = accuracy(predictions, label, topk=(1, 5))
            top1[0].update(top1_.item(), mass)
            top5[0].update(top5_.item(), mass)
            loss_value[0].update(loss.item(), mass)

            if is_validation is False:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            batch_time.update(time.time() - now)
            now = time.time()
            if iter % 50 == 0:
                print(
                    f"{'V' if is_validation else 'T'} Loss: {loss_value[0].avg:03.3f} "
                    f"Top1: {top1[0].avg:03.1f} Top5: {top5[0].avg:03.1f} "
                    f"{epoch: 3}/{iter:05}/{l_dl:05} Freq: {mass / batch_time.avg:04.1f}Hz:",
                    end='\r',
                    flush=True)
        if is_validation:
            print("validation")
            print("val-top1: %s" % top1[0].avg)
            print("val-top5: %s" % top5[0].avg)
        if self.writer:
            str_ = 'LP/val' if is_validation else 'LP/train'
            self.writer.add_scalar(f'{str_}/top1', top1[0].avg, epoch)
            self.writer.add_scalar(f'{str_}/top5', top5[0].avg, epoch)
            self.writer.add_scalar(f'{str_}/Freq', mass / batch_time.avg,
                                   epoch)

        return {
            "loss": [x.avg for x in loss_value],
            "top1": [x.avg for x in top1],
            "top5": [x.avg for x in top1]
        }
コード例 #5
0
    def optimize_epoch(self, model, optimizer, loader, epoch, validation=False):
        print(f"Starting epoch {epoch}, validation: {validation} " + "=" * 30)
        loss_value = AverageMeter()
        rotacc_value = AverageMeter()

        # house keeping
        if not validation:
            model.run()
            lr = self.lr_schedule(epoch)
            for pg in optimizer.param_groups:
                pg['lr'] = lr
        else:
            model.eval()

        XE = torch.nn.CrossEntropyLoss().to(self.dev)
        l_dl = 0  # len(loader)
        now = time.time()
        batch_time = MovingAverage(intertia=0.9)
        for iter, (data, label, selected) in enumerate(loader):
            now = time.time()

            if not validation:
                niter = epoch * len(loader.dataset) + iter * args.batch_size
            data = data.to(self.dev)
            mass = data.size(0)
            where = np.arange(mass, dtype=int) * 4
            data = data.view(mass * 4, 3, data.size(3), data.size(4))
            rotlabel = torch.tensor(range(4)).view(-1, 1).repeat(mass, 1).view(-1).to(self.dev)
            #################### train CNN ###########################################
            if not validation:
                final = model(data)
                if args.onlyrot:
                    loss = torch.Tensor([0]).to(self.dev)
                else:
                    if args.hc == 1:
                        loss = XE(final[0][where], self.L[selected])
                    else:
                        loss = torch.mean(
                            torch.stack([XE(final[k][where], self.L[k, selected]) for k in range(args.hc)]))
                rotloss = XE(final[-1], rotlabel)
                pred = torch.argmax(final[-1], 1)

                total_loss = loss + rotloss
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()
                correct = (pred == rotlabel).to(torch.float)
                rotacc = correct.sum() / float(mass)
            else:
                final = model(data)
                pred = torch.argmax(final[-1], 1)
                correct = (pred == rotlabel.cuda()).to(torch.float)
                rotacc = correct.sum() / float(mass)
                total_loss = torch.Tensor([0])
                loss = torch.Tensor([0])
                rotloss = torch.Tensor([0])
            rotacc_value.update(rotacc.item(), mass)
            loss_value.update(total_loss.item(), mass)

            batch_time.update(time.time() - now)
            now = time.time()
            print(
                f"Loss: {loss_value.avg:03.3f}, RotAcc: {rotacc_value.avg:03.3f} | {epoch: 3}/{iter:05}/{l_dl:05} Freq: {mass / batch_time.avg:04.1f}Hz:",
                end='\r', flush=True)

            # every few iter logging
            if iter % args.logiter == 0:
                if not validation:
                    print(niter, f" Loss: {loss.item():.3f}", flush=True)
                    with torch.no_grad():
                        if not args.onlyrot:
                            pred = torch.argmax(final[0][where], dim=1)
                            pseudoloss = XE(final[0][where], pred)
                    if not args.onlyrot:
                        self.writer.add_scalar('Pseudoloss', pseudoloss.item(), niter)
                    self.writer.add_scalar('lr', self.lr_schedule(epoch), niter)
                    self.writer.add_scalar('Loss', loss.item(), niter)
                    self.writer.add_scalar('RotLoss', rotloss.item(), niter)
                    self.writer.add_scalar('RotAcc', rotacc.item(), niter)

                    if iter > 0:
                        self.writer.add_scalar('Freq(Hz)', mass / (time.time() - now), niter)

        # end of epoch logging
        if self.writer and (epoch % self.log_interval == 0):
            write_conv(self.writer, model, epoch)
            if validation:
                print('val Rot-Acc: ', rotacc_value.avg)
                self.writer.add_scalar('val Rot-Acc', rotacc_value.avg, epoch)

        files.save_checkpoint_all(self.checkpoint_dir, model, args.arch,
                                  optimizer, self.L, epoch, lowest=False)
        return {'loss': loss_value.avg}