def train(self, epoch):
        self.model.train()

        train_loss = MovingAverageMeter()
        train_acc = AccuracyMeter()

        for i, (x, y) in enumerate(self.train_loader):
            x = Variable(x)
            y = Variable(y)

            if self.use_cuda:
                x = x.cuda()
                y = y.cuda()

            output = self.model(x)
            loss = F.cross_entropy(output, y)

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            train_loss.update(float(loss.data))

            y_pred = output.data.max(dim=1)[1]
            correct = int(y_pred.eq(y.data).cpu().sum())
            train_acc.update(correct, x.size(0))

        return train_loss.average, train_acc.accuracy
    def train_loop(self):
        self.model.train()

        ys, ps = [], []
        _loss, _acc = AverageMeter(), AccuracyMeter()
        with tqdm(total=len(self.dl_train.dataset), ncols=100, leave=False, desc=f"{self.cepoch} train") as t:
            for x, y in self.dl_train:
                x_, y_ = x.cuda(), y.cuda()
                p_ = self.model(x_)
                loss = self.criterion(p_, y_)

                # SAM
                loss.backward()
                clip_grad_norm_(self.model.parameters(), 1.0)
                self.optimizer.first_step(zero_grad=True)
                self.criterion(self.model(x_), y_).backward()
                clip_grad_norm_(self.model.parameters(), 1.0)
                self.optimizer.second_step(zero_grad=True)

                _loss.update(loss.item())
                _acc.update(y_, p_)
                ys.append(y)
                ps.append(p_.detach().cpu())

                t.set_postfix_str(f"loss:{loss.item():.6f} acc:{_acc():.2f}%", refresh=False)
                t.update(len(y))

        self.tys = torch.cat(ys)
        self.tps = torch.cat(ps).softmax(dim=1)
        self.tloss = _loss()

        self.tacc = (self.tys == torch.argmax(self.tps, dim=1)).sum().item() / len(self.tys) * 100
def eval(network, dataloader, device, tencrop):
    network.eval()
    softmax = nn.Softmax(dim=1).cuda()
    accs = []
    for idx in range(n_layers):
        accs.append(AccuracyMeter())

    pbar = tqdm(dataloader)
    for data in pbar:
        img = data[0].to(device)
        rot = data[1].long().to(device)
        if tencrop:
            bs, ncrops, c, h, w = img.size()
            img = img.view(-1, c, h, w)

        outputs = network(img)

        for idx in range(n_layers):
            outputs[idx].to(device)
            if tencrop:
                outputs[idx] = softmax(outputs[idx])
                outputs[idx] = torch.squeeze(outputs[idx].view(bs, ncrops,
                                                               -1).mean(1))

        for idx in range(13, n_layers):
            accuracy(outputs[idx], rot, accs[idx])

        str_content = generate_acc(n_layers, start=13)
        flt_content = []
        for idx in range(13, n_layers):
            flt_content.append(accs[idx].get())

        pbar.set_postfix(info=str_content.format(*flt_content))
    return accs
Ejemplo n.º 4
0
    def validate():
        net.eval()

        valid_loss = AverageMeter()
        valid_acc = AccuracyMeter()
        with torch.no_grad():
            for i, (x, y) in enumerate(valid_loader):
                x = x.to(device)
                y = y.to(device)

                output = net(x)
                loss = F.cross_entropy(output, y)

                pred = output.data.max(dim=1)[1]
                correct = int(pred.eq(y.data).cpu().sum())

                valid_loss.update(float(loss.data), number=x.size(0))
                valid_acc.update(correct, number=x.size(0))

        return valid_loss.average, valid_acc.accuracy
Ejemplo n.º 5
0
    def validate(self):
        self.model.eval()

        valid_loss = AverageMeter()
        valid_acc = AccuracyMeter()

        with torch.no_grad():
            for i, (x, y) in enumerate(self.valid_loader):
                x = x.to(self.device)
                y = y.to(self.device)

                output = self.model(x)
                loss = F.cross_entropy(output, y)

                valid_loss.update(float(loss.data), x.size(0))

                y_pred = output.data.max(dim=1)[1]
                correct = int(y_pred.eq(y.data).cpu().sum())
                valid_acc.update(correct, x.size(0))

        return valid_loss.average, valid_acc.accuracy
Ejemplo n.º 6
0
    def train():
        net.train()

        train_loss = AverageMeter()
        train_acc = AccuracyMeter()
        for i, (x, y) in enumerate(train_loader):
            x = x.to(device)
            y = y.to(device)

            output = net(x)
            loss = F.cross_entropy(output, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            pred = output.data.max(dim=1)[1]
            correct = int(pred.eq(y.data).cpu().sum())

            train_loss.update(float(loss.data), number=x.size(0))
            train_acc.update(correct, number=x.size(0))

        return train_loss.average, train_acc.accuracy
    def validate(self):
        self.model.eval()

        valid_loss = AverageMeter()
        valid_acc = AccuracyMeter()

        for i, (x, y) in enumerate(self.valid_loader):
            x = Variable(x, volatile=True)
            y = Variable(y)

            if self.use_cuda:
                x = x.cuda()
                y = y.cuda()

            output = self.model(x)
            loss = F.cross_entropy(output, y)

            valid_loss.update(float(loss.data), x.size(0))

            y_pred = output.data.max(dim=1)[1]
            correct = int(y_pred.eq(y.data).cpu().sum())
            valid_acc.update(correct, x.size(0))

        return valid_loss.average, valid_acc.accuracy
def train(i_epoch, network, criterion, optimizer, dataloader, device):
    network.eval()
    losses = []
    accs = []
    for idx in range(n_layers):
        losses.append(AvgMeter())
        accs.append(AccuracyMeter())
    pbar = tqdm(dataloader)
    for data in pbar:
        img = data[0].to(device)
        rot = data[1].long().to(device)

        outputs = network(img)
        for idx in range(n_layers):
            outputs[idx].to(device)

        optimizer.zero_grad()
        all_loss = []
        for idx in range(n_layers):
            all_loss.append(criterion(outputs[idx], rot))
            accuracy(outputs[idx], rot, accs[idx])

        loss = 0
        for idx in range(n_layers):
            loss += all_loss[idx]
            #all_loss[idx].backward()
            losses[idx].add(all_loss[idx].item())
        loss.backward()
        optimizer.step()

        lr = optimizer.param_groups[0]['lr']
        str_content = generate_lossacc(n_layers, start=13)
        # str_content = 'c1:{:.4f}/{:.4f} c2:{:.4f}/{:.4f} c3:{:.4f}/{:.4f} c4:{:.4f}/{:.4f} c5:{:.4f}/{:.4f}, lr:{}'
        flt_content = []
        for idx in range(13, n_layers):
            flt_content.append(losses[idx].get())
            flt_content.append(accs[idx].get())
        flt_content.append(lr)
        pbar.set_description("Epoch:{}".format(i_epoch))
        pbar.set_postfix(info=str_content.format(*flt_content))

    return losses, accs
Ejemplo n.º 9
0
def model_train(model, config, criterion, trainloader, testloader, validloader,
                model_name):
    num_epochs = config['budget']
    success = False
    time_to_94 = None

    lrs = list()
    logging.info(f"weight decay:\t{config['weight_decay']}")
    logging.info(f"momentum :\t{config['momentum']}")

    base_optimizer = optim.SGD(model.parameters(),
                               lr=config['base_lr'],
                               weight_decay=config['weight_decay'],
                               momentum=config['momentum'])
    if config['swa']:
        optimizer = torchcontrib.optim.SWA(base_optimizer)

        # lr_scheduler = SWAResNetLR(optimizer, milestones=config['milestones'], schedule=config['schedule'], swa_start=config['swa_start'], swa_init_lr=config['swa_init_lr'], swa_step=config['swa_step'], base_lr=config['base_lr'])
    else:
        optimizer = base_optimizer
        # lr_scheduler = PiecewiseLinearLR(optimizer, milestones=config['milestones'], schedule=config['schedule'])

    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, num_epochs)
    #lr_scheduler = PiecewiseLinearLR(optimizer, milestones=config['milestones'], schedule=config['schedule'])
    save_model_str = './models/'

    if not os.path.exists(save_model_str):
        os.mkdir(save_model_str)

    save_model_str += f'model_({datetime.datetime.now()})'
    if not os.path.exists(save_model_str):
        os.mkdir(save_model_str)

    summary_dir = f'{save_model_str}/summary'
    if not os.path.exists(summary_dir):
        os.mkdir(summary_dir)
    c = datetime.datetime.now()
    train_meter = AccuracyMeter(model_dir=summary_dir, name='train')
    test_meter = AccuracyMeter(model_dir=summary_dir, name='test')
    valid_meter = AccuracyMeter(model_dir=summary_dir, name='valid')

    for epoch in range(num_epochs):
        lr = lr_scheduler.get_lr()[0]
        lrs.append(lr)

        logging.info('epoch %d, lr %e', epoch, lr)

        train_acc, train_obj, time = train(trainloader, model, criterion,
                                           optimizer, model_name,
                                           config['grad_clip'],
                                           config['prefetch'])

        train_meter.update({
            'acc': train_acc,
            'loss': train_obj
        }, time.total_seconds())
        lr_scheduler.step()
        if config['swa'] and ((epoch + 1) >= config['swa_start']) and (
            (epoch + 1 - config['swa_start']) % config['swa_step'] == 0):
            optimizer.update_swa()
        valid_acc, valid_obj, time = infer(testloader,
                                           model,
                                           criterion,
                                           name=model_name,
                                           prefetch=config['prefetch'])
        valid_meter.update({
            'acc': valid_acc,
            'loss': valid_obj
        }, time.total_seconds())
        if valid_acc >= 94:
            success = True
            time_to_94 = train_meter.time
            logging.info(f'Time to reach 94% {time_to_94}')
        # wandb.log({"Test Accuracy":valid_acc, "Test Loss": valid_obj, "Train Accuracy":train_acc, "Train Loss": train_obj})

    a = datetime.datetime.now() - c
    if config['swa']:
        optimizer.swap_swa_sgd()
        optimizer.bn_update(trainloader, model)
    test_acc, test_obj, time = infer(testloader,
                                     model,
                                     criterion,
                                     name=model_name,
                                     prefetch=config['prefetch'])
    test_meter.update({
        'acc': test_acc,
        'loss': test_obj
    }, time.total_seconds())
    torch.save(model.state_dict(), f'{save_model_str}/state')
    # wandb.save('model.h5')
    train_meter.plot(save_model_str)
    valid_meter.plot(save_model_str)

    plt.plot(lrs)
    plt.title('LR vs epochs')
    plt.xlabel('Epochs')
    plt.ylabel('LR')
    plt.xticks(np.arange(0, num_epochs, 5))
    plt.savefig(f'{save_model_str}/lr_schedule.png')
    plt.close()

    device = get('device')
    device_name = cpuinfo.get_cpu_info(
    )['brand'] if device.type == 'cpu' else torch.cuda.get_device_name(0)
    total_time = round(a.total_seconds(), 2)
    logging.info(
        f'test_acc: {test_acc}, save_model_str:{save_model_str}, total time :{total_time} and device used {device_name}'
    )
    _, cnt, time = train_meter.get()
    time_per_step = round(time / cnt, 2)
    return_dict = {
        'test_acc': test_acc,
        'save_model_str': save_model_str,
        'training_time_per_step': time_per_step,
        'total_train_time': time,
        'total_time': total_time,
        'device_used': device_name,
        'train_acc': train_acc
    }
    if success:
        return_dict['time_to_94'] = time_to_94
    return return_dict, model