Ejemplo n.º 1
0
    def process_function(engine, batch):
        x, y = _prepare_batch_fp16(batch, device=device, non_blocking=True)

        if config['enable_mixup']:
            x, y = mixup_data(x, y, config['mixup_alpha'],
                              config['mixup_proba'])

        optimizer.zero_grad()
        y_pred = model(x)

        loss = criterion(y_pred, y)
        loss.backward()

        if config["clip_gradients"] is not None:
            clip_grad_norm_(model.parameters(), config["clip_gradients"])

        if config['use_adamw']:
            for group in optimizer.param_groups:
                for param in group['params']:
                    param.data.add_(-weight_decay / batch_size * group['lr'])

        optimizer.step()
        loss = loss.item()

        return loss
    def run_train_iter(self, x, y,manual_verified,epoch_number = -1):
        """
        Receives the inputs and targets for the model and runs a training iteration. Returns loss and accuracy metrics.
        :param x: The inputs to the model. A numpy array of shape batch_size, channels, height, width
        :param y: The targets for the model. A numpy array of shape batch_size, num_classes
        :return: the loss and accuracy for this batch
        """
        self.train()  # sets model to training mode (in case batch normalization or other methods have different procedures for training and evaluation)


        if len(y.shape) > 1:
            y = np.argmax(y, axis=1)  # convert one hot encoded labels to single integer labels
            y_no_cuda = y
        #print(type(x))

        if type(x) is np.ndarray:
            x, y = torch.Tensor(x).float().to(device=self.device), torch.Tensor(y).long().to(
            device=self.device)  # send data to device as torch tensors
            y_no_cuda = torch.Tensor(y_no_cuda).long()
        x = x.to(self.device)
        y = y.to(self.device)

        if self.mixup == True:
            inputs, targets_a, targets_b,y_, lam  = MixUp.mixup_data(x, y,y_no_cuda,self.num_classes,
                                                       self.alpha, use_cuda=self.use_gpu)
           # inputs, targets_a, targets_b = map(Variable, (inputs,
           #                                           targets_a, targets_b))
            if self.stack  == True:
                x_stack = torch.stack((x, inputs), 0)
                x_stack = x_stack.view((self.batch_size,1,self.heigth,self.width))
                out = self.model.forward_train(x_stack) 
                loss_mix = MixUp.mixup_criterion(out[:int(self.batch_size/2)],targets_a,targets_b,lam,self.device)
                loss_smooth = CustomLosses.loss_function(out[int(self.batch_size/2):],y,y_no_cuda,self.num_classes,self.device,self.eps_smooth,self.loss_function,
                                          array_manual_label=manual_verified,consider_manual = self.consider_manual)
                loss = (loss_mix + loss_smooth)/2
            else:
                out = self.model.forward_train(x)  # forward the data in the model
                loss = MixUp.mixup_criterion(out, targets_a, targets_b, lam,self.device)
        else:
            out = self.model.forward_train(x)
            loss = CustomLosses.loss_function(out,y,y_no_cuda,self.num_classes,self.device,self.eps_smooth,self.loss_function,
                                          array_manual_label=manual_verified,consider_manual = self.consider_manual)
        
       #if self.loss_function=='CCE':
       #    loss = F.cross_entropy(input=out, target=y)  # compute loss
       #elif self.loss_function=='lq_loss':
       #    loss=CustomLosses.lq_loss(y_true=y,y_pred=out,_q=self.q_)

        self.optimizer.zero_grad()  # set all weight grads from previous training iters to 0
        loss.backward()  # backpropagate to compute gradients for current iter loss

        self.optimizer.step()  # update network parameters
        _, predicted = torch.max(out.data, 1)  # get argmax of predictions
        if self.stack:
            accuracy = np.mean(list(predicted[int(self.batch_size/2):].eq(y.data).cpu())) 
        else:

            accuracy = np.mean(list(predicted.eq(y.data).cpu()))  # compute accuracy
        return loss.data.detach().cpu().numpy(), accuracy
Ejemplo n.º 3
0
 def forward(self, X):
     if self.beta > 0:
         lam = np.random.beta(self.beta, self.beta)
     else:
         lam = 1
     X, permutation, lam = mixup_data(X, lam)
     X = ReverseLayerF.apply(X, self.alpha)
     for layer in self.layers:
         X = layer(X)
     return X, permutation, lam
Ejemplo n.º 4
0
def train(epoch):
    logf.write('\nEpoch: %d' % epoch)
    print('Epoch: %d' % epoch)
    net.train()
    train_loss, correct, total = 0, 0, 0
    batch_accs = []
    batch_losses = []
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        if args.use_mix_up:
            optimizer.zero_grad()
            inputs, targets_a, targets_b, lam = mixup_data(
                args, inputs, targets, args.mix_up_alpha,
                args.use_uniform_mixup, use_cuda)
            inputs, targets_a, targets_b = map(Variable,
                                               (inputs, targets_a, targets_b))

            outputs = net(inputs)
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b,
                                   lam)
            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += targets.size(0)
            correct += (
                lam * predicted.eq(targets_a.data).cpu().sum().float() +
                (1 - lam) * predicted.eq(targets_b.data).cpu().sum().float())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        else:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

        cur_loss = train_loss / (batch_idx + 1)
        acc = 100. * correct / total
        logf.write('[%d]Loss: %.3f | Acc: %.3f%% (%d/%d)\n' %
                   (batch_idx, cur_loss, acc, correct, total))
        if batch_idx % 100 == 0:
            print('[%d]Loss: %.3f | Acc: %.3f%% (%d/%d)' %
                  (batch_idx, cur_loss, acc, correct, total))
        batch_accs.append(acc)
        batch_losses.append(cur_loss)
    acc = float(correct) / total
    print('Train Acc:{}'.format(acc))
    return np.mean(batch_losses), acc
Ejemplo n.º 5
0
def warmup_cudnn(model, criterion, batch_size, config):
    # run forward and backward pass of the model on a batch of random inputs
    # to allow benchmarking of cudnn kernels
    x = torch.Tensor(np.random.rand(batch_size, 3, 32, 32)).cuda()
    x = x.half()
    y = torch.LongTensor(np.random.randint(0, 10, batch_size)).cuda()

    if config['enable_mixup']:
        x, y = mixup_data(x, y, config['mixup_alpha'], config['mixup_proba'])

    model.train(True)
    y_pred = model(x)
    loss = criterion(y_pred, y)
    loss.backward()
    model.zero_grad()
    torch.cuda.synchronize()
Ejemplo n.º 6
0
    def process_function(engine, batch):
        x, y = _prepare_batch_fp16(batch, device=device, non_blocking=True)

        if config['enable_mixup']:
            x, y = mixup_data(x, y, config['mixup_alpha'],
                              config['mixup_proba'])

        optimizer.zero_grad()
        y_pred = model(x)

        loss = criterion(y_pred, y)
        loss.backward()

        if config["clip_gradients"] is not None:
            clip_grad_norm_(model.parameters(), config["clip_gradients"])

        optimizer.step()
        loss = loss.item()

        return loss
def iterate(epoch, phase):
    is_train = True
    if phase == 'train':
        is_train = True
    elif phase == 'valid':
        is_train = False
    else:
        raise ValueError('Unrecognized phase: ' + str(phase))

    if is_train is True:
        net.train()
        '''learning rate scheduling'''
        if config['optimizer']['use_adam'] is False:
            lr = optim.get_epoch_lr(epoch)
            optim.set_lr(optimizer, lr)
    else:
        net.eval()

    phase_dataloader = dataloaders[phase]

    acc_loss = 0.
    is_saved = False

    global best_valid_loss
    global global_iter_valid
    global global_iter_train

    with torch.set_grad_enabled(is_train):
        # with autograd.detect_anomaly():
        for batch_idx, (inputs, targets) in enumerate(phase_dataloader):
            inputs = inputs.to(device)
            targets = targets.to(device)

            # view_inputs(inputs)
            if is_train is True:
                '''mix up'''
                inputs, targets_a, targets_b, lam = mixup.mixup_data(inputs, targets,
                                                                     device, float(config['params']['mixup_alpha']))
                # inputs, targets_a, targets_b = map(Variable, (inputs,
                #                                               targets_a, targets_b))

                '''label smoothing'''
                targets_a = label_smoothing.smooth_one_hot(true_labels=targets_a, classes=num_classes,
                                                           smoothing=float(config['params']['label_smoothing']))
                targets_b = label_smoothing.smooth_one_hot(true_labels=targets_b, classes=num_classes,
                                                           smoothing=float(config['params']['label_smoothing']))
            else:
                targets = label_smoothing.smooth_one_hot(true_labels=targets, classes=num_classes,
                                                         smoothing=0.0)

            # view_inputs(inputs)
            if config['model']['type'] == 'arcface':
                if is_train is True:
                    logits = net(inputs, targets_a)
                else:
                    logits = net(inputs, targets)
            else:
                logits = net(inputs)
            outputs = logits.log_softmax(dim=1)

            if is_train is True:
                loss = mixup.mixup_criterion(criterion, outputs, targets_a, targets_b, lam)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            else:
                loss = criterion(outputs, targets)

            preds = outputs.argmax(dim=1, keepdim=True)

            if is_train is True:
                targets_a = targets_a.argmax(dim=1, keepdim=True)
                targets_b = targets_b.argmax(dim=1, keepdim=True)
                accuracy = (lam * preds.eq(targets_a).float().sum()
                            + (1 - lam) * preds.eq(targets_b).float().sum())
                accuracy = accuracy / (targets_a.shape[0] + targets_b.shape[0])
            else:
                targets = targets.argmax(dim=1, keepdim=True)
                accuracy = preds.eq(targets).float().mean()

            acc_loss += loss.item()
            avg_loss = acc_loss / (batch_idx + 1)
            print('[%s] epoch: %3d | iter: %4d | loss: %.3f | avg_loss: %.3f | accuracy: %.3f'
                  % (phase, epoch, batch_idx, loss.item(), avg_loss, accuracy))

            if is_train is True:
                summary_writer.add_scalar('train/loss', loss.item(), global_iter_train)
                summary_writer.add_scalar('train/acc', accuracy, global_iter_train)
                global_iter_train += 1
            else:
                summary_writer.add_scalar('valid/loss', loss.item(), global_iter_valid)
                summary_writer.add_scalar('valid/acc', accuracy, global_iter_valid)
                global_iter_valid += 1

        state = {
            'net': net.state_dict(),
            'loss': best_valid_loss,
            'epoch': epoch,
            'lr': config['optimizer']['lr'],
            'batch': config['params']['batch_size'],
            'global_train_iter': global_iter_train,
            'global_valid_iter': global_iter_valid,
            'optimizer': optimizer.state_dict()
        }

        if is_train is True:
            print('[Train] Saving..')
            # torch.save(state, config['model']['exp_path'] + '/ckpt-' + str(epoch) + '.pth')
            torch.save(state, os.path.join(config['exp']['path'], 'latest.pth'))
        else:
            # check whether better model or not
            if avg_loss < best_valid_loss:
                best_valid_loss = avg_loss
                is_saved = True

            if is_saved is True:
                print('[Valid] Saving..')
                # torch.save(state, config['model']['exp_path'] + '/ckpt-' + str(epoch) + '.pth')
                torch.save(state, os.path.join(config['exp']['path'], 'best.pth'))
Ejemplo n.º 8
0
    [backbone, margin], optimizer = amp.initialize([backbone, margin], optimizer, opt_level='O1', verbosity=0)

iter_idx = 0
for epoch in range(epochs):
    print('\nEpoch: {}'.format(epoch + 1))
    train_loss = 0
    correct = 0
    total = 0
    batch_idx = 0
    since = time.time()
    for inputs, targets in train_loader:
        backbone.train()
        margin.train()
        inputs, targets = inputs.to(device), targets.to(device)
        if mixup:
            inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, alpha)
            feature = backbone(inputs)
            outputs = margin(feature, targets_a, targets_b, lam, device=device, mixed_precision=mixed_precision)
            loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam)
        else:
            feature = backbone(inputs)
            outputs = margin(feature, targets, device=device, mixed_precision=mixed_precision)
            loss = criterion(outputs, targets)
        if mixed_precision:
            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        if iter_idx % accumulate == 0:
            optimizer.step()
            optimizer.zero_grad()
Ejemplo n.º 9
0
def train(args, batch_size):
    current_epoch = 0

    width = height = 320
    data_dir = 'data/data/'
    dataset = ImageDataset(data_dir, 'data/training.csv', width, height)
    category_map = get_categories('data/species.csv')

    file_count = len(dataset) // batch_size
    model = seresnext50_32x4d(True, num_classes=len(category_map), drop_rate=0)
    #model = resnest50_fast_1s4x24d('pretrained/resnest50_fast_1s4x24d-d4a4f76f.pth', num_classes=len(category_map))
    #model = resnest50_fast_4s2x40d('pretrained/resnest50_fast_4s2x40d-41d14ed0.pth', num_classes=len(category_map))
    model = torch.nn.DataParallel(model).cuda()
    if args.resume_from != None:
        current_epoch = get_epoch(args.resume_from)
        model_dict = torch.load(args.resume_from).module.state_dict()
        model.module.load_state_dict(model_dict)
        print("resume from ", args.resume_from)

    lr = 0.005
    use_lr = lr
    optimizer = SGD(model.parameters(),
                    lr=use_lr,
                    momentum=0.9,
                    weight_decay=0.0001)

    print(current_epoch)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                   3,
                                                   gamma=0.5,
                                                   last_epoch=-1)
    #lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=0.00005)
    #lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.8)

    epochs = 15
    loss_fn = nn.CrossEntropyLoss()
    show_loss_loop = 10
    alpha = 0.1
    for epoch in range(current_epoch, epochs):
        show_cate_loss = 0
        dataloader = DataLoader(dataset=dataset,
                                batch_size=batch_size,
                                shuffle=True,
                                num_workers=4)

        start = time.time()
        for i, data in enumerate(dataloader):
            optimizer.zero_grad()
            if random.random() < 0.5:
                inputs = to_tensor(data[0]).cuda()
                targets = data[1].cuda()
                inputs, targets_a, targets_b, lam = mixup_data(
                    inputs, targets, alpha, True)
                inputs, targets_a, targets_b = map(
                    Variable, (inputs, targets_a, targets_b))
                outputs = model(inputs)
                cate_loss = mixup_criterion(loss_fn, outputs, targets_a,
                                            targets_b, lam)
            else:
                im = to_tensor(data[0]).cuda()
                category_id = data[1].cuda()
                cate_fc = model(im)
                cate_loss = loss_fn(cate_fc, category_id)

            cate_loss.backward()
            optimizer.step()

            show_cate_loss += cate_loss.item()
            if (i + 1) % show_loss_loop == 0:
                end = time.time()
                use_time = (end - start) / show_loss_loop
                start = end
                need_time = ((file_count *
                              (epochs - epoch) - i) * use_time) / 60 / 60

                show_cate_loss /= show_loss_loop
                print("epoch: {}/{} iter:{}/{} lr:{:.5f}, cate_loss:{:.5f}, use_time:{:.2f}/iter, need_time:{:.2f} h".\
                 format(epoch+1, epochs, (i+1), file_count, lr_scheduler.get_lr()[0], show_cate_loss, use_time, need_time))
                show_cate_loss = 0

        lr_scheduler.step()
        torch.save(model, 'models/epoch_{}.pth'.format(epoch + 1))