def training_step(self, batch, batch_idx):  # mixup + labelsmooth+ circle_loss

        data, labels = batch

        # mixed_x, labels_a, labels_b, lam = mixup_data(data, labels, 0.2)
        # output = self(mixed_x)
        # loss = mixup_criterion(LabelSmoothingLoss(4, smoothing=0.1), output, labels_a, labels_b, lam)

        output = self(data)
        loss = LabelSmoothingLoss(dc.num_classes, smoothing=0.1)(output, labels)
        return {'loss': loss}
def main():
    fold = 0
    #4.1 mkdirs
    if not os.path.exists(config.submit):
        os.mkdir(config.submit)
    if not os.path.exists(config.weights):
        os.mkdir(config.weights)
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists(config.logs):
        os.mkdir(config.logs)
    if not os.path.exists(config.weights + config.model_name + os.sep +
                          str(fold) + os.sep):
        os.makedirs(config.weights + config.model_name + os.sep + str(fold) +
                    os.sep)
    if not os.path.exists(config.best_models + config.model_name + os.sep +
                          str(fold) + os.sep):
        os.makedirs(config.best_models + config.model_name + os.sep +
                    str(fold) + os.sep)

    #define model
    model_name = 'mynet'  #

    model = base_net()

    print(model)

    if cuda_avail:
        model.cuda()

    optimizer = optim.SGD(model.parameters(),
                          lr=config.lr,
                          momentum=0.9,
                          weight_decay=config.weight_decay)

    #optimizer = Lookahead(optimizer)

    Loss = LabelSmoothingLoss(config.num_classes, smoothing=0.1).cuda()

    #lr_scheduler = CosineWarmupLr(optimizer, 320, 40,
    #                          base_lr=config.lr, warmup_epochs=1)
    #optimizer = optim.Adam(model.parameters(),lr = config.lr,amsgrad=True,weight_decay=config.weight_decay)
    weights = torch.tensor([1., 5])
    criterion = nn.CrossEntropyLoss(weight=weights).cuda()
    #criterion = CircleLoss(m=0.25, gamma=30)
    #criterion = FocalLoss().cuda()
    log = Logger()
    log.open(config.logs + "log_train.txt", mode="a")
    log.write(
        "\n----------------------------------------------- [START %s] %s\n\n" %
        (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), '-' * 51))
    #4.3 some parameters for  K-fold and restart model
    start_epoch = 0
    best_precision1 = 0
    best_precision_save = 0
    resume = False
    test_only = False
    eval_only = True  #True  False

    #4.4 restart the training process
    if resume:
        checkpoint = torch.load(config.weights + config.model_name + '/' +
                                str(fold) + "/checkpoint.pth.tar")
        start_epoch = checkpoint["epoch"]
        fold = checkpoint["fold"]
        best_precision1 = checkpoint["best_precision1"]
        model.load_state_dict(checkpoint["state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer"])

    #4.5 get files and split for K-fold dataset
    #4.5.1 read files
    train_ = get_files(config.train_data, "train")

    test_files = get_files(config.test_data, "test")

    train_data_list, val_data_list = train_test_split(train_,
                                                      test_size=0.6,
                                                      stratify=train_["label"])

    print(val_data_list)

    #4.5.2 split 5 folds
    split_fold = StratifiedKFold(n_splits=5)
    folds_indexes = split_fold.split(X=train_list["filename"],
                                     y=train_list["label"])
    folds_indexes = np.array(list(folds_indexes))
    fold_index = folds_indexes[fold]

    train_im = []
    train_label = []
    val_im = []
    val_label = []

    print(train_list['filename'])
    print(len(fold_index[0]))

    for i in fold_index[0]:
        i = int(i)
        train_im.append(train_list["filename"][i])
        train_label.append(train_list["label"][i])
    train_data_list = pd.DataFrame({
        "filename": train_im,
        'label': train_label
    })

    for i in fold_index[1]:
        val_im.append(train_list["filename"][i])
        val_label.append(train_list["label"][i])

    val_data_list = pd.DataFrame({"filename": val_im, 'label': val_label})

    #print(fold_index[0])
    #4.5.3 using fold index to split for train data and val data
    #train_data_list = pd.concat([train_data_list["filename"][fold_index[0]],train_data_list["label"][fold_index[0]]],axis=1)
    #val_data_list = pd.concat([train_data_list["filename"][fold_index[1]],train_data_list["label"][fold_index[1]]],axis=1)

    #4.5.4 load datase4
    train_dataloader = DataLoader(ChaojieDataset(train_data_list),
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  collate_fn=collate_fn,
                                  pin_memory=True,
                                  num_workers=4)
    #val_list for x ray
    val_dataloader = DataLoader(ChaojieDataset(val_data_list, train=False),
                                batch_size=config.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn,
                                pin_memory=False,
                                num_workers=4)
    test_dataloader = DataLoader(ChaojieDataset(test_files, test=True),
                                 batch_size=config.batch_size * 2,
                                 shuffle=False,
                                 pin_memory=False)
    #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,"max",verbose=1,patience=3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    #4.5.5.1 define metrics
    train_losses = AverageMeter()
    train_top1 = AverageMeter()
    train_top2 = AverageMeter()
    valid_loss = [np.inf, 0, 0]
    model.train()
    #logs
    log.write('** start training here! **\n')
    log.write(
        '                           |------------ VALID -------------|----------- TRAIN -------------|------Accuracy------|------------|\n'
    )
    log.write(
        'lr       iter     epoch    | loss   top-1  top-2            | loss   top-1  top-2           |    Current Best    | time       |\n'
    )
    log.write(
        '-------------------------------------------------------------------------------------------------------------------------------\n'
    )
    #4.5.5 train
    start = timer()
    if eval_only:
        #best_model = torch.load(config.best_models +config.model_name+os.sep+ str(fold) +os.sep+ 'model_best.pth.tar')
        best_model = torch.load(config.weights + 'x-ray/' +
                                'model_best.pth.tar')
        model.load_state_dict(best_model["state_dict"])
        #valid_loss = evaluate(val_dataloader,model,criterion,0.5)
        df = pd.DataFrame({'true': trues, 'prob': prob})
        df.to_csv('gt.csv', index=False)

        for i in tqdm(range(201)):
            valid_loss = evaluate(val_dataloader, model, criterion, i * 0.005)
        df = pd.DataFrame({
            'Sensitivity': Sensitivity,
            'Specificity': Specificity
        })
        df.to_csv('roc.csv', index=False)
        df = pd.DataFrame({'Precisions': Precisions, 'Recalls': Recalls})
        df.to_csv('prc.csv', index=False)

        return

    if test_only:
        best_model = torch.load(config.best_models + config.model_name +
                                os.sep + str(fold) + os.sep +
                                'model_best.pth.tar')
        model.load_state_dict(best_model["state_dict"])
        test(test_dataloader, model, fold)
    total_loss = 10
    for epoch in range(start_epoch, config.epochs):
        scheduler.step(epoch)
        # train
        #global iter
        for iter, (input, target) in enumerate(train_dataloader):
            #4.5.5 switch to continue train process
            model.train()
            input = Variable(input).cuda()
            target = Variable(torch.from_numpy(np.array(target)).long()).cuda()
            #target = Variable(target).cuda()
            output = model(input)
            loss = criterion(output, target)
            #loss = Loss(output,target)

            precision1_train, precision2_train = accuracy(output,
                                                          target,
                                                          topk=(1, 2))
            train_losses.update(loss.item(), input.size(0))
            train_top1.update(precision1_train[0], input.size(0))
            train_top2.update(precision2_train[0], input.size(0))
            #backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #lr_scheduler.step()
            lr = get_learning_rate(optimizer)
            print('\r', end='', flush=True)
            print('%0.4f %5.1f %6.1f        | %0.3f  %0.3f  %0.3f         | %0.3f  %0.3f  %0.3f         |         %s         | %s' % (\
                         lr, iter/len(train_dataloader) + epoch, epoch,
                         valid_loss[0], valid_loss[1], valid_loss[2],
                         train_losses.avg, train_top1.avg, train_top2.avg,str(best_precision_save),
                         time_to_str((timer() - start),'min'))
            , end='',flush=True)
        #evaluate
        lr = get_learning_rate(optimizer)
        #evaluate every half epoch
        valid_loss = evaluate(val_dataloader, model, criterion,
                              0.5)  #criterion Loss
        loss_min = False
        if valid_loss[0] < total_loss:
            total_loss = valid_loss[0]
            loss_min = True
        #valid_loss = [0.5,0.5,0.5]
        is_best = False
        is_best = valid_loss[1] >= best_precision1
        best_precision1 = max(valid_loss[1], best_precision1)
        try:
            best_precision_save = best_precision1.cpu().data.numpy()
        except:
            pass

        if is_best:
            save_checkpoint(
                {
                    "epoch": epoch + 1,
                    "model_name": config.model_name,
                    "state_dict": model.state_dict(),
                    "best_precision1": best_precision1,
                    "optimizer": optimizer.state_dict(),
                    "fold": fold,
                    "valid_loss": valid_loss,
                }, is_best, loss_min, fold)

        if loss_min:
            save_checkpoint(
                {
                    "epoch": epoch + 1,
                    "model_name": config.model_name,
                    "state_dict": model.state_dict(),
                    "best_precision1": best_precision1,
                    "optimizer": optimizer.state_dict(),
                    "fold": fold,
                    "valid_loss": valid_loss,
                }, is_best, loss_min, fold)
        #adjust learning rate
        #scheduler.step(valid_loss[1])
        print("\r", end="", flush=True)
        log.write('%0.4f %5.1f %6.1f        | %0.3f  %0.3f  %0.3f          | %0.3f  %0.3f  %0.3f         |         %s         | %s' % (\
                        lr, 0 + epoch, epoch,
                        valid_loss[0], valid_loss[1], valid_loss[2],
                        train_losses.avg,    train_top1.avg,    train_top2.avg, str(best_precision_save),
                        time_to_str((timer() - start),'min'))
                )
        log.write('\n')
        time.sleep(0.01)
Beispiel #3
0
parser.add_argument('--epochs', default=3, type=int,
                    help='Number of epoch for training')

parser.add_argument('--train_batch_size', default=256, type=int,
                    help='Batch size for training')

parser.add_argument('--test_batch_size', default=128, type=int,
                    help='Batch size for training')

parser.add_argument('--save_dir', default='../weights', type=str,
                    help='directory to save model')

args = parser.parse_args()


LSLoss = LabelSmoothingLoss(3, smoothing=0.1)

def loss_fn(outputs,target):
    loss = nn.CrossEntropyLoss()(outputs, target)
    return loss


def train(dataset, dataloader, model, optimizer, device, loss_fn):
    model.train()
    final_loss = 0
    counter = 0
    for batch_ind, d in tqdm(enumerate(dataloader),total=int(len(dataset))/dataloader.batch_size):
        counter += 1
        image = d['image']
        label = d['label']
        image = image.to(device,dtype=torch.float)
    else:
        model = architecture.HighDimensionalModel(model_name, num_classes)

    model = nn.DataParallel(model).to(device)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=0.1,
                                momentum=0.9,
                                weight_decay=1e-4)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=[299, 449],
                                               gamma=0.1)
    epoch_stop = 600

    if "category" in label:
        if smoothing > 0:
            criterion = LabelSmoothingLoss(num_classes, smoothing=smoothing)
        else:
            criterion = nn.CrossEntropyLoss()

    else:
        criterion = nn.SmoothL1Loss()

    # Initializes training
    load_from_checkpoint = False
    if load_from_checkpoint:
        checkpoint = torch.load(checkpoint_path)
        epoch_start = checkpoint["epoch"]
        train_loss = checkpoint["train_loss"]
        valid_loss = checkpoint["valid_loss"]
        valid_acc = checkpoint["valid_acc"]
        model.load_state_dict(checkpoint["model_state_dict"])
Beispiel #5
0
                              base_lr=args.lr,
                              warmup_epochs=args.warmup_epochs)
if resume_epoch > 0:
    checkpoint = torch.load(args.resume_param)
    model.load_state_dict(checkpoint['model'])
    optimizer.load_state_dict(checkpoint['optimizer'])
    amp.load_state_dict(checkpoint['amp'])
    lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
    print("Finish loading resume param.")

top1_acc = metric.Accuracy(name='Top1 Accuracy')
top5_acc = metric.TopKAccuracy(top=5, name='Top5 Accuracy')
loss_record = metric.NumericalCost(name='Loss')

Loss = nn.CrossEntropyLoss() if not args.label_smoothing else \
    LabelSmoothingLoss(classes, smoothing=0.1)


@torch.no_grad()
def test(epoch=0, save_status=False):
    top1_acc.reset()
    top5_acc.reset()
    loss_record.reset()
    model.eval()
    for data, labels in val_data:
        data = data.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)

        outputs = model(data)
        losses = Loss(outputs, labels)
def main_worker(gpu, ngpus_per_node, args):
    args.gpu = gpu
    logger = get_logger(args.logging_file)
    logger.info("Use GPU: {} for training".format(args.gpu))

    args.rank = args.rank * ngpus_per_node + gpu
    torch.distributed.init_process_group(backend="nccl",
                                         init_method=args.dist_url,
                                         world_size=args.world_size,
                                         rank=args.rank)

    epochs = args.epochs
    input_size = args.input_size
    resume_epoch = args.resume_epoch
    initializer = KaimingInitializer()
    zero_gamma = ZeroLastGamma()
    mix_precision_training = args.mix_precision_training
    is_first_rank = True if args.rank % ngpus_per_node == 0 else False

    batches_pre_epoch = args.num_training_samples // (args.batch_size *
                                                      ngpus_per_node)
    lr = 0.1 * (args.batch_size * ngpus_per_node //
                32) if args.lr == 0 else args.lr

    model = get_model(models, args.model)

    model.apply(initializer)
    if args.last_gamma:
        model.apply(zero_gamma)
        logger.info('Apply zero last gamma init.')

    if is_first_rank and args.model_info:
        summary(model, torch.rand((1, 3, input_size, input_size)))

    parameters = model.parameters() if not args.no_wd else no_decay_bias(model)
    if args.sgd_gc:
        logger.info('Use SGD_GC optimizer.')
        optimizer = SGD_GC(parameters,
                           lr=lr,
                           momentum=args.momentum,
                           weight_decay=args.wd,
                           nesterov=True)
    else:
        optimizer = optim.SGD(parameters,
                              lr=lr,
                              momentum=args.momentum,
                              weight_decay=args.wd,
                              nesterov=True)

    lr_scheduler = CosineWarmupLr(optimizer,
                                  batches_pre_epoch,
                                  epochs,
                                  base_lr=args.lr,
                                  warmup_epochs=args.warmup_epochs)

    # dropblock_scheduler = DropBlockScheduler(model, batches_pre_epoch, epochs)

    if args.lookahead:
        optimizer = Lookahead(optimizer)
        logger.info('Use lookahead optimizer.')

    torch.cuda.set_device(args.gpu)
    model.cuda(args.gpu)
    args.num_workers = int(
        (args.num_workers + ngpus_per_node - 1) / ngpus_per_node)

    if args.mix_precision_training and is_first_rank:
        logger.info('Train with FP16.')

    scaler = GradScaler(enabled=args.mix_precision_training)
    model = nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])

    Loss = nn.CrossEntropyLoss().cuda(args.gpu) if not args.label_smoothing else \
        LabelSmoothingLoss(args.classes, smoothing=0.1).cuda(args.gpu)

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    if args.autoaugment:
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomHorizontalFlip(),
            ImageNetPolicy,
            transforms.ToTensor(),
            normalize,
        ])
    else:
        train_transform = transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            # Cutout(),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(0.4, 0.4, 0.4),
            transforms.ToTensor(),
            normalize,
        ])

    val_transform = transforms.Compose([
        transforms.Resize(int(input_size / 0.875)),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        normalize,
    ])

    train_set = ImageNet(args.data_path,
                         split='train',
                         transform=train_transform)
    val_set = ImageNet(args.data_path, split='val', transform=val_transform)

    train_sampler = DistributedSampler(train_set)
    train_loader = DataLoader(train_set,
                              args.batch_size,
                              False,
                              pin_memory=True,
                              num_workers=args.num_workers,
                              drop_last=True,
                              sampler=train_sampler)
    val_loader = DataLoader(val_set,
                            args.batch_size,
                            False,
                            pin_memory=True,
                            num_workers=args.num_workers,
                            drop_last=False)

    if resume_epoch > 0:
        loc = 'cuda:{}'.format(args.gpu)
        checkpoint = torch.load(args.resume_param, map_location=loc)
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        scaler.load_state_dict(checkpoint['scaler'])
        lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
        print("Finish loading resume param.")

    torch.backends.cudnn.benchmark = True

    top1_acc = metric.Accuracy(name='Top1 Accuracy')
    top5_acc = metric.TopKAccuracy(top=5, name='Top5 Accuracy')
    loss_record = metric.NumericalCost(name='Loss')

    for epoch in range(resume_epoch, epochs):
        tic = time.time()
        train_sampler.set_epoch(epoch)
        if not args.mixup:
            train_one_epoch(model, train_loader, Loss, optimizer, epoch,
                            lr_scheduler, logger, top1_acc, loss_record,
                            scaler, args)
        else:
            train_one_epoch_mixup(model, train_loader, Loss, optimizer, epoch,
                                  lr_scheduler, logger, loss_record, scaler,
                                  args)
        train_speed = int(args.num_training_samples // (time.time() - tic))
        if is_first_rank:
            logger.info(
                'Finish one epoch speed: {} samples/s'.format(train_speed))
        test(model, val_loader, Loss, epoch, logger, top1_acc, top5_acc,
             loss_record, args)

        if args.rank % ngpus_per_node == 0:
            checkpoint = {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'scaler': scaler.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
            }
            torch.save(
                checkpoint, '{}/{}_{}_{:.5}.pt'.format(args.save_dir,
                                                       args.model, epoch,
                                                       top1_acc.get()))
    def forward(self, predictions, targets):
        """Multibox Loss
        Args:
            predictions (tuple): A tuple containing loc preds, conf preds,
            and prior boxes from SSD net.
                conf shape: torch.size(batch_size,num_priors,num_classes)
                loc shape: torch.size(batch_size,num_priors,4)
                priors shape: torch.size(num_priors,4)

            targets (tensor): Ground truth boxes and labels for a batch,
                shape: [batch_size,num_objs,5] (last idx is the label).
        """
        arm_loc_data, arm_conf_data, odm_loc_data, odm_conf_data, priors = predictions

        if self.use_ARM:
            loc_data, conf_data = odm_loc_data, odm_conf_data
        else:
            loc_data, conf_data = arm_loc_data, arm_conf_data
        num = loc_data.size(0)
        priors = priors[:loc_data.size(1), :]
        num_priors = (priors.size(0))
        num_classes = self.num_classes

        # match priors (default boxes) and ground truth boxes
        loc_t = torch.Tensor(num, num_priors, 4)
        conf_t = torch.LongTensor(num, num_priors)
        for idx in range(num):
            truths = targets[idx][:, :-1].data
            labels = targets[idx][:, -1].data
            if num_classes == 2:
                labels = labels >= 0
            defaults = priors.data
            if self.use_ARM:
                refine_match(self.threshold, truths, defaults, self.variance,
                             labels, loc_t, conf_t, idx,
                             arm_loc_data[idx].data)
            else:
                refine_match(self.threshold, truths, defaults, self.variance,
                             labels, loc_t, conf_t, idx)
        if self.use_gpu:
            loc_t = loc_t.cuda()
            conf_t = conf_t.cuda()
        # wrap targets
        loc_t.requires_grad = False
        conf_t.requires_grad = False

        if self.use_ARM:
            P = F.softmax(arm_conf_data, 2)
            arm_conf_tmp = P[:, :, 1]
            object_score_index = arm_conf_tmp <= self.theta
            pos = conf_t > 0
            pos[object_score_index.data] = 0
        else:
            pos = conf_t > 0

        # Localization Loss (Smooth L1)
        # Shape: [batch,num_priors,4]
        pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
        loc_p = loc_data[pos_idx].view(-1, 4)
        loc_t = loc_t[pos_idx].view(-1, 4)
        loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')

        # Compute max conf across batch for hard negative mining
        batch_conf = conf_data.view(-1, self.num_classes)
        loss_c = log_sum_exp(batch_conf) - batch_conf.gather(
            1, conf_t.view(-1, 1))
        #print(loss_c.size())

        # Hard Negative Mining
        loss_c[pos.view(-1, 1)] = 0  # filter out pos boxes for now
        loss_c = loss_c.view(num, -1)
        _, loss_idx = loss_c.sort(1, descending=True)
        _, idx_rank = loss_idx.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.negpos_ratio * num_pos, max=pos.size(1) - 1)
        neg = idx_rank < num_neg.expand_as(idx_rank)

        # Confidence Loss Including Positive and Negative Examples
        pos_idx = pos.unsqueeze(2).expand_as(conf_data)
        neg_idx = neg.unsqueeze(2).expand_as(conf_data)
        conf_p = conf_data[(pos_idx + neg_idx).gt(0)].view(
            -1, self.num_classes)
        targets_weighted = conf_t[(pos + neg).gt(0)]
        #print(pos_idx.size(), neg_idx.size(), conf_p.size(), targets_weighted.size())
        Loss_label = LabelSmoothingLoss(self.num_classes, 0.1)

        loss_c = Loss_label(conf_p, targets_weighted)
        # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N

        N = num_pos.data.sum().float()
        #N = max(num_pos.data.sum().float(), 1)
        loss_l /= N
        loss_c /= N
        #print(N, loss_l, loss_c)
        return loss_l, loss_c