Example #1
0
def train_model(config, dataroot, augment, cv_ratio_test, cv_fold, save_path=None, skip_exist=False):
    C.get()
    C.get().conf = config
    C.get().aug = augment

    result = train_and_eval(config, None, dataroot, cv_ratio_test, cv_fold, save_path=save_path, only_eval=skip_exist)
    return C.get()['model'], cv_fold, result
Example #2
0
def eval_tta(config, augment):
    augment['num_policy'] = 1  # TODO remove
    C.get()
    C.get().conf = config
    cv_ratio_test, cv_fold, save_path = augment['cv_ratio_test'], augment['cv_fold'], augment['save_path']
    print(augment)
    # setup - provided augmentation rules
    C.get().aug = policy_decoder(augment, augment['num_policy'], augment['num_op'])

    # eval
    ckpt = torch.load(save_path)
    model = get_model(ckpt['model_specs']['name'], len(ckpt['labels']), ckpt['model_specs']['training_configs'], local_rank=ckpt['devices']['gpu_index']) #TODO: get model configuration from Retinanet

    if 'model' in ckpt:
        model.load_state_dict(ckpt['model'])
    else:
        model.load_state_dict(ckpt)
    model.eval()
    dataroot = ckpt['model_specs']['data']['home_path']
    mAPs = []
    start_t = time.time()
    for _ in range(augment['num_policy']):  # TODO
        train_dataset, test_dataset = get_data(ckpt['model_specs']['data']['annotation_type'], dataroot,
                                                  split=cv_ratio_test, split_idx=cv_fold)
        # mAP = evaluate(dataset_val, model)
        mAP = evaluate(train_dataset, model) #TODO: adjust from train to testing on randomely selected perecentage every time
        mAPs.append(mAP)
        del train_dataset, test_dataset

    gpu_secs = (time.time() - start_t) * torch.cuda.device_count()
    # reporter(minus_loss=metrics['minus_loss'], top1_valid=metrics['correct'], elapsed_time=gpu_secs, done=True)
    # track.log(minus_loss=metrics['minus_loss'], top1_valid=metrics['correct'], elapsed_time=gpu_secs, done=True)
    tune.report(top1_valid=np.mean(mAPs))
    return np.mean(mAPs)
Example #3
0
def run_epoch(model,
              loader,
              loss_fn,
              optimizer,
              desc_default='',
              epoch=0,
              writer=None,
              verbose=1,
              scheduler=None):
    if verbose:
        loader = tqdm(loader, disable=False)
        loader.set_description('[{} {}/{}]'.format(desc_default, epoch,
                                                   C.get()['epoch']))

    metrics = Accumulator()
    cnt = 0
    total_steps = len(loader)
    steps = 0
    for data, label in loader:
        steps += 1
        data, label = data.cuda(), label.cuda()

        if optimizer:
            optimizer.zero_grad()

        preds = model(data)
        loss = loss_fn(preds, label)

        if optimizer:
            loss.backward()
            if C.get()['optimizer'].get('clip', 5) > 0:
                nn.utils.clip_grad_norm_(model.parameters(),
                                         C.get()['optimizer'].get('clip', 5))
            optimizer.step()

        top1, top5 = accuracy(preds, label, (1, 5))
        metrics.add_dict({
            'loss': loss.item() * len(data),
            'top1': top1.item() * len(data),
            'top5': top5.item() * len(data),
        })
        cnt += len(data)
        if verbose:
            postfix = metrics / cnt
            if optimizer:
                postfix['lr'] = optimizer.param_groups[0]['lr']
            loader.set_postfix(postfix)

        if scheduler is not None:
            scheduler.step(epoch - 1 + float(steps) / total_steps)

        del preds, loss, top1, top5, data, label

    metrics /= cnt
    if optimizer:
        metrics.metrics['lr'] = optimizer.param_groups[0]['lr']
    if verbose:
        for key, value in metrics.items():
            writer.add_scalar(key, value, epoch)
    return metrics
Example #4
0
def train_model(config,
                dataloaders,
                dataroot,
                augment,
                cv_ratio_test,
                cv_id,
                save_path=None,
                skip_exist=False,
                evaluation_interval=5,
                gr_assign=None,
                gr_dist=None):
    C.get()
    C.get().conf = config
    C.get()['aug'] = augment
    result = train_and_eval(None,
                            dataloaders,
                            dataroot,
                            cv_ratio_test,
                            cv_id,
                            save_path=save_path,
                            only_eval=skip_exist,
                            evaluation_interval=evaluation_interval,
                            gr_assign=gr_assign,
                            gr_dist=gr_dist)
    return C.get()['model']['type'], cv_id, result
Example #5
0
 def __next__(self):
     inputs, labels = next(self.loader_iter)
     if self.controller:
         # ! original image to controller(only normalized)
         # ! augmented image to model
         _, _, sampled_policies = self.controller(inputs.cuda())
         batch_policies = batch_policy_decoder(
             sampled_policies
         )  # (list:list:list:tuple) [batch, num_policy, n_op, 3]
         aug_inputs, applied_policy = augment_data(inputs, batch_policies)
         self.applied_policy = applied_policy
     else:
         aug_inputs = []
         for img in inputs:
             pil_img = transforms.ToPILImage()(UnNormalize()(img.cpu()))
             transform_img = transforms.Compose([
                 transforms.RandomCrop(32, padding=4),
                 transforms.RandomHorizontalFlip(),
                 transforms.ToTensor(),
                 transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD),
             ])
             if C.get()['cutout'] > 0:
                 transform_img.transforms.append(
                     CutoutDefault(C.get()['cutout']))
             if C.get()['aug'] == 'fa_reduced_cifar10':
                 transform_img.transforms.insert(
                     0, Augmentation(fa_reduced_cifar10()))  ###
             aug_img = transform_img(pil_img)
             aug_inputs.append(aug_img)
         aug_inputs = torch.stack(aug_inputs)
     return (aug_inputs, labels)
Example #6
0
def gr_augment(imgs, gr_ids, gr_policies):
    """
    imgs: unnormalized np.array
    """
    aug_imgs = []
    applied_policy = []
    for img, gr_id in zip(imgs, gr_ids):
        # policy: (list:list:tuple) [num_policy, n_op, 3]
        augment = Augmentation(gr_policies[gr_id])
        pil_img = transforms.ToPILImage()(img.cpu())
        # pil_img = img
        aug_img = augment(pil_img)
        # apply original training/valid transforms
        transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD),
        ])
        if C.get()['cutout'] > 0:
            transform.transforms.append(CutoutDefault(C.get()['cutout']))
        aug_img = transform(aug_img)
        aug_imgs.append(aug_img)
        applied_policy.append(augment.policy)
    aug_imgs = torch.stack(aug_imgs)
    assert type(aug_imgs) == torch.Tensor and aug_imgs.shape == imgs.shape, \
           "Augmented Image Type Error, type: {}, shape: {}".format(type(aug_imgs), aug_imgs.shape)
    return aug_imgs, applied_policy
Example #7
0
def get_data(dataset,
             dataroot,
             resize=608,
             split=0.15,
             split_idx=0,
             multinode=False,
             target_lb=-1):

    transform_train = transforms.Compose(
        [Normalizer(), Augmenter(),
         Resizer(min_side=resize)])
    transform_test = transforms.Compose(
        [Normalizer(), Resizer(min_side=resize)])

    if isinstance(C.get().aug, list):
        logger.debug('augmentation provided.')
        transform_train.transforms.insert(
            0, Augmentation(C.get().aug, detection=True))

    if dataset == 'coco':
        total_trainset = CocoDataset(dataroot,
                                     set_name='train2017',
                                     transform=transform_train)
        testset = CocoDataset(dataroot,
                              set_name='val2017',
                              transform=transform_test)

    return total_trainset, testset
Example #8
0
def get_dataset(dataset):
    if dataset == 'imagenet':
        transform_train = Compose([
            RandomResizedCrop(C.get()['target_size'] + 32,
                              scale=(0.9, 1.0),
                              interpolation=PIL.Image.BICUBIC),
        ])
        transform_test = Compose([
            Resize(C.get()['target_size'] + 32,
                   interpolation=PIL.Image.BICUBIC)
        ])
        trainset = ImageNet(root=imagenet_path,
                            split='train',
                            transform=transform_train)
        testset1 = ImageNet(root=imagenet_path,
                            split='val',
                            transform=transform_train)
        testset2 = ImageNet(root=imagenet_path,
                            split='val',
                            transform=transform_test)

        trainset.num_class = testset1.num_class = testset2.num_class = 1000
        trainset.targets = [lb for _, lb in trainset.samples]
    else:
        raise ValueError(dataset)
    return trainset, testset1, testset2
Example #9
0
def run_epoch(model, loader, loss_fn, optimizer, desc_default='', epoch=0, writer=None, verbose=1, scheduler=None):
    tqdm_disable = bool(os.environ.get('TASK_NAME', ''))    # KakaoBrain Environment
    if verbose:
        loader = tqdm(loader, disable=tqdm_disable)
        loader.set_description('[%s %04d/%04d]' % (desc_default, epoch, C.get()['epoch']))

    metrics = Accumulator()
    cnt = 0
    total_steps = len(loader)
    steps = 0
    for data, label in loader:
        steps += 1
        data, label = data.cuda(), label.cuda()

        if optimizer:
            optimizer.zero_grad()

        preds = model(data)
        loss = loss_fn(preds, label)

        if optimizer:
            loss.backward()
            if getattr(optimizer, "synchronize", None):
                optimizer.synchronize()     # for horovod
            if C.get()['optimizer'].get('clip', 5) > 0:
                nn.utils.clip_grad_norm_(model.parameters(), C.get()['optimizer'].get('clip', 5))
            optimizer.step()

        top1, top5 = accuracy(preds, label, (1, 5))
        metrics.add_dict({
            'loss': loss.item() * len(data),
            'top1': top1.item() * len(data),
            'top5': top5.item() * len(data),
        })
        cnt += len(data)
        if verbose:
            postfix = metrics / cnt
            if optimizer:
                postfix['lr'] = optimizer.param_groups[0]['lr']
            loader.set_postfix(postfix)

        if scheduler is not None:
            scheduler.step(epoch - 1 + float(steps) / total_steps)

        del preds, loss, top1, top5, data, label

    if tqdm_disable:
        if optimizer:
            logger.info('[%s %03d/%03d] %s lr=%.6f', desc_default, epoch, C.get()['epoch'], metrics / cnt, optimizer.param_groups[0]['lr'])
        else:
            logger.info('[%s %03d/%03d] %s', desc_default, epoch, C.get()['epoch'], metrics / cnt)

    metrics /= cnt
    if optimizer:
        metrics.metrics['lr'] = optimizer.param_groups[0]['lr']
    if verbose:
        for key, value in metrics.items():
            writer.add_scalar(key, value, epoch)
    return metrics
    def __getitem__(self, item):
        if not self.aug:
            uuid = self.list[item]
        else:
            uuid = self.list[item // test_aug_sz]

        colors = ['red', 'green', 'blue', 'yellow']
        flags = cv2.IMREAD_GRAYSCALE
        img = [cv2.imread(os.path.join(self.default_path, uuid + '_' + color + self.ext), flags) for color in colors]
        if self.resize:
            img = [cv2.resize(x, (1024, 1024)) for x in img]

        img = np.stack(img, axis=-1)

        # TODO : data augmentation zoom/shear/brightness
        if 'train' in self.setname:
            augment_img = iaa.Sequential([
                iaa.OneOf([
                    iaa.Affine(rotate=0),
                    iaa.Affine(rotate=90),
                    iaa.Affine(rotate=180),
                    iaa.Affine(rotate=270),
                    iaa.Fliplr(0.5),
                    iaa.Flipud(0.5),
                ])
            ], random_order=True)
            img = augment_img.augment_image(img)

            # cutout
            if C.get()['cutout_p'] > 0.0:
                img = cutout(C.get()['cutout_size'], C.get()['cutout_p'], False)(img)

            # TODO : channel drop(except green)?
            # d_ch = random.choice([0, 2, 3])
            # img[:, :, d_ch] = 0

        if self.aug:
            # teat-time aug. : tta
            tta_list = list(itertools.product(
                [iaa.Affine(rotate=0), iaa.Affine(rotate=90), iaa.Affine(rotate=180), iaa.Affine(rotate=270)],
                [iaa.Fliplr(0.0), iaa.Fliplr(1.0), iaa.Flipud(1.0), iaa.Sequential([iaa.Fliplr(1.0), iaa.Flipud(1.0)])]
            ))
            tta_idx = item % len(tta_list)
            img = tta_list[tta_idx][0].augment_image(img)
            img = tta_list[tta_idx][1].augment_image(img)

        img = img.astype(np.float32)
        img /= 255.  # TODO : different normalization?
        img = np.transpose(img, (2, 0, 1))
        img = np.ascontiguousarray(img)

        if self.setname == 'tests':
            lb = np.zeros(len(name_label_dict), dtype=np.int)
        else:
            lb = [int(x) for x in self.labels.loc[uuid]['Target'].split()]
            lb = np.eye(len(name_label_dict), dtype=np.float)[lb].sum(axis=0)
        return img, lb
Example #11
0
def run_epoch(model,
              loader,
              loss_fn,
              optimizer,
              desc_default='',
              epoch=0,
              writer=None,
              verbose=1):
    if verbose:
        loader = tqdm(loader)
        if optimizer:
            curr_lr = optimizer.param_groups[0]['lr']
            loader.set_description(
                '[%s %04d/%04d] lr=%.4f' %
                (desc_default, epoch, C.get()['epoch'], curr_lr))
        else:
            loader.set_description('[%s %04d/%04d]' %
                                   (desc_default, epoch, C.get()['epoch']))

    metrics = Accumulator()
    cnt = 0
    for data, label in loader:
        data, label = data.cuda(), label.cuda()

        if optimizer:
            optimizer.zero_grad()

        preds = model(data)
        loss = loss_fn(preds, label)

        if optimizer:
            nn.utils.clip_grad_norm_(model.parameters(), 5)
            loss.backward()
            optimizer.step()

        top1, top5 = accuracy(preds, label, (1, 5))

        metrics.add_dict({
            'loss': loss.item() * len(data),
            'top1': top1.item() * len(data),
            'top5': top5.item() * len(data),
        })
        cnt += len(data)
        if verbose:
            loader.set_postfix(metrics / cnt)

        del preds, loss, top1, top5, data, label

    metrics /= cnt
    if optimizer:
        metrics.metrics['lr'] = optimizer.param_groups[0]['lr']
    if verbose:
        for key, value in metrics.items():
            writer.add_scalar(key, value, epoch)
    return metrics
Example #12
0
    def __adjust_learning_rate_pyramid(epoch):
        """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
        base_lr = C.get()['lr']
        if C.get()['gradual']['lr'] > 1:
            base_lr *= C.get()['gradual']['lr'] * min(
                1.0, epoch / C.get()['gradual']['epoch'])
        lr = base_lr * (0.1**(epoch //
                              (max_epoch * 0.5))) * (0.1**(epoch //
                                                           (max_epoch * 0.75)))

        for param_group in optimizer.param_groups:
            param_group['lr'] = lr
Example #13
0
def pretrain_k_folds(copied_conf, cv_ratio, num_fold) -> None:
    global MODEL_PATHS, DATASET_ROOT
    global logger, watcher
    logger.info(
        '----- [Phase 1.] Train without Augmentations cv=%d ratio(test)=%.1f -----'
        % (num_fold, cv_ratio))
    watcher.start(tag='train_no_aug')

    reqs = [
        train_model.remote(config=copy.deepcopy(copied_conf),
                           dataroot=DATASET_ROOT,
                           augment=Config.get()['aug'],
                           cv_ratio_test=cv_ratio,
                           cv_fold=fold_idx,
                           save_path=MODEL_PATHS[fold_idx],
                           skip_exist=True) for fold_idx in range(num_fold)
    ]

    tqdm_epoch = tqdm(range(Config.get()['epoch']))
    is_done = False
    for epoch in tqdm_epoch:
        while True:
            epochs_per_fold = OrderedDict()
            for fold_idx in range(num_fold):
                try:
                    latest_ckpt = torch.load(MODEL_PATHS[fold_idx])
                    if 'epoch' not in latest_ckpt:
                        epochs_per_fold['fold%d' %
                                        (fold_idx + 1)] = Config.get()['epoch']
                        continue
                    epochs_per_fold['fold%d' %
                                    (fold_idx + 1)] = latest_ckpt['epoch']
                except Exception as e:
                    continue
            tqdm_epoch.set_postfix(epochs_per_fold)
            if len(epochs_per_fold) == num_fold and min(
                    epochs_per_fold.values()) >= Config.get()['epoch']:
                is_done = True
            if len(epochs_per_fold) == num_fold and min(
                    epochs_per_fold.values()) >= epoch:
                break
            time.sleep(10)
        if is_done:
            break

    logger.info('getting results...')
    pretrain_results = ray.get(reqs)
    for r_model, r_cv, r_dict in pretrain_results:
        logger.info(
            'model=%s cv=%d top1_train=%.4f top1_valid=%.4f' %
            (r_model, r_cv + 1, r_dict['top1_train'], r_dict['top1_valid']))
    logger.info('processed in %.4f secs' % watcher.pause('train_no_aug'))
def adjust_learning_rate_resnet(optimizer):
    """
    Sets the learning rate to the initial LR decayed by 10 every [90, 180, 240] epochs
    Ref: AutoAugment
    """

    if C.get()['epoch'] == 90:
        return torch.optim.lr_scheduler.MultiStepLR(optimizer, [30, 60, 80])
    elif C.get()['epoch'] == 270:  # autoaugment
        return torch.optim.lr_scheduler.MultiStepLR(optimizer, [90, 180, 240])
    else:
        raise ValueError('invalid epoch=%d for resnet scheduler' %
                         C.get()['epoch'])
Example #15
0
def efficientnet_params(model_name):
    """ Map EfficientNet model name to parameter coefficients. """
    params_dict = {
        # Coefficients:   width,depth,res,dropout
        'efficientnet-l2t': (1.0, 1.0, C.get()['size'], C.get()['dropout']),  # b0 with higher dropout ratio
        'efficientnet-b0': (1.0, 1.0, 224, 0.2),
        'efficientnet-b1': (1.0, 1.1, 240, 0.2),
        'efficientnet-b2': (1.1, 1.2, 260, 0.3),
        'efficientnet-b3': (1.2, 1.4, 300, 0.3),
        'efficientnet-b4': (1.4, 1.8, 380, 0.4),
        'efficientnet-b5': (1.6, 2.2, 456, 0.4),
        'efficientnet-b6': (1.8, 2.6, 528, 0.5),
        'efficientnet-b7': (2.0, 3.1, 600, 0.5),
    }
    return params_dict[model_name]
Example #16
0
def get_affinity(aug, aff_bases, config, augment):
    C.get()
    C.get().conf = config
    # setup - provided augmentation rules
    C.get()['aug'] = aug
    load_paths = augment['load_paths']
    cv_num = augment["cv_num"]

    aug_loaders = []
    for cv_id in range(cv_num):
        _, tl, validloader, tl2 = get_dataloaders(C.get()['dataset'],
                                                  C.get()['batch'],
                                                  augment['dataroot'],
                                                  augment['cv_ratio_test'],
                                                  split_idx=cv_id)
        aug_loaders.append(validloader)
        del tl, tl2

    loss_fn = torch.nn.CrossEntropyLoss(reduction='none')
    aug_accs = []
    for cv_id, loader in enumerate(aug_loaders):
        # eval
        model = get_model(C.get()['model'], num_class(C.get()['dataset']))
        ckpt = torch.load(load_paths[cv_id])
        if 'model' in ckpt:
            model.load_state_dict(ckpt['model'])
        else:
            model.load_state_dict(ckpt)
        model.eval()

        metrics = Accumulator()
        for data, label in loader:
            data = data.cuda()
            label = label.cuda()

            pred = model(data)
            loss = loss_fn(pred, label)  # (N)

            _, pred = pred.topk(1, 1, True, True)
            pred = pred.t()
            correct = pred.eq(label.view(
                1, -1).expand_as(pred)).detach().cpu().numpy()  # (1,N)

            metrics.add_dict({
                'minus_loss':
                -1 * np.sum(loss.detach().cpu().numpy()),
                'correct':
                np.sum(correct),
                'cnt':
                len(data)
            })
            del loss, correct, pred, data, label
        aug_accs.append(metrics['correct'] / metrics['cnt'])
    del model
    affs = []
    for aug_valid, clean_valid in zip(aug_accs, aff_bases):
        affs.append(aug_valid - clean_valid)
    return affs
Example #17
0
    def retrain(self, save_path=None):
        if save_path is None:
            augment_path = _get_path(
                C.get()['dataset'],
                C.get()['model']['type'],
                'ratio%.1f_augment%d' % (args.cv_ratio, 0))

        logger.info('getting results...')
        final_results = train_model(copy.deepcopy(self.copied_c),
                                    args.dataroot,
                                    self.final_policy_set,
                                    0.0,
                                    0,
                                    save_path=save_path)
        logger.info(w)
        return final_results
def get_dataset(oversample=True):
    cv_fold = C.get()['cv_fold']

    if cv_fold < 0:
        with open(os.path.join('./split/tr_names.txt'), 'r') as text_file:
            tr_n = text_file.read().split(',')
        with open(os.path.join('./split/val_names.txt'), 'r') as text_file:
            val_n = text_file.read().split(',')
        cval_n = val_n
    else:
        with open(os.path.join('./split/tr_names_fold%d' % cv_fold), 'r') as text_file:
            tr_n = text_file.read().split(',')
        with open(os.path.join('./split/val_names_fold%d' % cv_fold), 'r') as text_file:
            val_n = text_file.read().split(',')
        with open(os.path.join('./split/val_names.txt'), 'r') as text_file:
            cval_n = text_file.read().split(',')

    # test_names = sorted({f[:36] for f in os.listdir(TEST)})
    with open(SAMPLE, 'r') as text_file:
        test_names = [x.split(',')[0] for x in text_file.readlines()[1:]]

    # print(len(tr_n), len(val_n), len(test_names))
    if oversample:
        s = Oversampling(os.path.join(PATH, LABELS))
        tr_n = [idx for idx in tr_n for _ in range(s.get(idx))]

    return tr_n, val_n, cval_n, test_names
Example #19
0
def build_save_str(args):
    optional_tokens = []
    if "gmaxup_cifar" in args.dataroot:
        optional_tokens.append("gmaxup")
    if args.name:
        optional_tokens.append(args.name)

    optional_str = ""
    if len(optional_tokens):
        for token in optional_tokens:
            optional_str += "{}-".format(token)

    return '{}e-{}-{}{}'.format(
        C.get()['epoch'],
        C.get()['aug'],  # augmentation string
        optional_str,  # optional string 
        datetime.datetime.now().strftime("%-m.%d.%y-%H:%M:%s:%f"))
    def __adjust_learning_rate_pyramid(epoch):
        """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
        base_lr = C.get()['lr']
        lr = base_lr * (0.1**(epoch //
                              (max_epoch * 0.5))) * (0.1**(epoch //
                                                           (max_epoch * 0.75)))

        return lr
Example #21
0
 def augmentation(self, data, gr_ids, policy):
     aug_imgs = []
     if "cifar" in C.get()["dataset"]:
         mean, std = _CIFAR_MEAN, _CIFAR_STD
     elif "svhn" in C.get()["dataset"]:
         mean, std = _SVHN_MEAN, _SVHN_STD
     # applied_policies = []
     for gr_id, img in zip(gr_ids, data):
         pil_img = transforms.ToPILImage()(UnNormalize()(img.cpu()))
         _aug = Augmentation(policy[int(gr_id)])
         aug_img = _aug(pil_img)
         aug_img = self.transform(aug_img)
         aug_imgs.append(aug_img)
         # applied_policy = _aug.policy # Todo
         # applied_policies.append(applied_policy)
     aug_imgs = torch.stack(aug_imgs)
     return aug_imgs.cuda()  #, applied_policies
Example #22
0
def eval_tta3(config, augment, reporter):
    C.get()
    C.get().conf = config
    save_path = augment['save_path']
    cv_id, gr_id = augment["cv_id"], augment["gr_id"]
    gr_ids = augment["gr_ids"]

    # setup - provided augmentation rules
    C.get()['aug'] = policy_decoder(augment, augment['num_policy'],
                                    augment['num_op'])

    # eval
    model = get_model(C.get()['model'], num_class(C.get()['dataset']))
    ckpt = torch.load(save_path)
    if 'model' in ckpt:
        model.load_state_dict(ckpt['model'])
    else:
        model.load_state_dict(ckpt)
    del ckpt
    model.eval()

    loader = get_post_dataloader(C.get()["dataset"],
                                 C.get()['batch'], augment["dataroot"],
                                 augment['cv_ratio_test'], cv_id, gr_id,
                                 gr_ids)

    start_t = time.time()
    metrics = Accumulator()
    loss_fn = torch.nn.CrossEntropyLoss(reduction='none')
    for data, label in loader:
        data = data.cuda()
        label = label.cuda()

        pred = model(data)
        loss = loss_fn(pred, label)  # (N)

        _, pred = pred.topk(1, 1, True, True)
        pred = pred.t()
        correct = pred.eq(label.view(
            1, -1).expand_as(pred)).detach().cpu().numpy()  # (1,N)

        metrics.add_dict({
            'loss': np.sum(loss.detach().cpu().numpy()),
            'correct': np.sum(correct),
            'cnt': len(data)
        })
        del loss, correct, pred, data, label
    del model, loader
    metrics = metrics / 'cnt'
    gpu_secs = (time.time() - start_t) * torch.cuda.device_count()
    reporter(loss=metrics['loss'],
             top1_valid=metrics['correct'],
             elapsed_time=gpu_secs,
             done=True)
    return metrics['correct']
def get_dataloaders(tests_aug=False):
    tr, vl, cvl, ts = get_dataset()
    if C.get()['extdata']:
        if C.get()['cv_fold'] >= 0:
            with open(os.path.join('./split/tr_ext_names_fold%d' % C.get()['cv_fold']), 'r') as text_file:
                tr_n = text_file.read().split(',')
            with open(os.path.join('./split/val_ext_names_fold%d' % C.get()['cv_fold']), 'r') as text_file:
                val_n = text_file.read().split(',')
            ds_train = CombinedDataset(KaggleDataset('train', tr), HPADataset('train_hpa_v18', tr_n))
            ds_valid = CombinedDataset(KaggleDataset('valid', tr), HPADataset('valid_hpa_v18', val_n))
        else:
            with open(os.path.join('./split/tr_ext_names_fold0'), 'r') as text_file:
                tr_n = text_file.read().split(',')
            with open(os.path.join('./split/val_ext_names_fold0'), 'r') as text_file:
                val_n = text_file.read().split(',')
            tr_n += val_n
            ds_train = CombinedDataset(KaggleDataset('train', tr), HPADataset('train_hpa_v18', tr_n))
            ds_valid = KaggleDataset('valid', tr)
    else:
        ds_train = KaggleDataset('train', tr)
        ds_valid = KaggleDataset('valid', vl, aug=False)
    ds_cvalid = KaggleDataset('cvalid', cvl, aug=False)
    ds_tests = KaggleDataset('tests', ts, aug=tests_aug)
    print('data size=', len(ds_train), len(ds_valid), len(ds_cvalid), len(ds_tests))

    d_train = torch.utils.data.DataLoader(ds_train, C.get()['batch'], pin_memory=True, num_workers=16 if C.get()['highres'] else 128, shuffle=True, drop_last=True)
    d_valid = torch.utils.data.DataLoader(ds_valid, C.get()['batch'], pin_memory=True, num_workers=4, shuffle=False, drop_last=True)
    d_cvalid = torch.utils.data.DataLoader(ds_cvalid, C.get()['batch'], pin_memory=True, num_workers=4, shuffle=False, drop_last=True)
    d_tests = torch.utils.data.DataLoader(ds_tests, test_aug_sz if tests_aug else 1, pin_memory=True, num_workers=16, shuffle=False)

    return d_train, d_valid, d_cvalid, d_tests
 def __init__(self, setname, data_list):
     csv = pd.read_csv(LABELS_HPA).set_index('Id')
     super().__init__(setname, data_list, aug=False)
     if C.get()['highres']:
         self.default_path = '/data/public/rw/kaggle-human-protein-atlas/hpa_v18/images_2048'
     else:
         self.default_path = '/data/public/rw/kaggle-human-protein-atlas/hpa_v18/images'
     self.labels = csv
     self.ext = '.png'
Example #25
0
def train_model(config,
                dataroot,
                augment,
                cv_ratio_test,
                cv_fold,
                save_path=None,
                skip_exist=False):  # TODO: 解耦这里的config相关操作
    Config.get()
    Config.get().conf = config
    Config.get()['aug'] = augment

    result = train_and_eval(None,
                            dataroot,
                            cv_ratio_test,
                            cv_fold,
                            save_path=save_path,
                            only_eval=skip_exist)
    return Config.get()['model']['type'], cv_fold, result
Example #26
0
def adjust_learning_rate_resnet(optimizer):
    epoch = C.get()['epoch']

    if epoch == 90:
        return torch.optim.lr_scheduler.MultiStepLR(optimizer, [30, 60, 80])
    elif epoch == 270:
        return torch.optim.lr_scheduler.MultiStepLR(optimizer, [90, 180, 240])
    else:
        raise ValueError(
            'Invalid epoch={} for resnet scheduler.'.format(epoch))
Example #27
0
def train_model(config,
                dataroot,
                augment,
                cv_ratio_test,
                cv_fold,
                save_path=None,
                skip_exist=False):
    print('into training')
    C.get()
    C.get().conf = config
    C.get()['aug'] = augment

    result = train_and_eval(None,
                            dataroot,
                            cv_ratio_test,
                            cv_fold,
                            save_path=save_path,
                            only_eval=skip_exist)
    return C.get()['model']['type'], cv_fold, result
Example #28
0
def eval_tta2(config, augment, reporter):
    C.get()
    C.get().conf = config
    cv_ratio_test, cv_id, save_path = augment['cv_ratio_test'], augment['cv_id'], augment['save_path']
    gr_id = augment["gr_id"]
    num_repeat = 1

    # setup - provided augmentation rules
    C.get()['aug'] = policy_decoder(augment, augment['num_policy'], augment['num_op'])

    # eval
    model = get_model(C.get()['model'], num_class(C.get()['dataset']))
    ckpt = torch.load(save_path)
    if 'model' in ckpt:
        model.load_state_dict(ckpt['model'])
    else:
        model.load_state_dict(ckpt)
    model.eval()

    loaders = []
    for i in range(num_repeat):
        _, tl, validloader, tl2 = get_dataloaders(C.get()['dataset'], C.get()['batch'], augment['dataroot'], cv_ratio_test, split_idx=cv_id, gr_assign=augment["gr_assign"], gr_id=gr_id)
        loaders.append(validloader)
        del tl, tl2


    start_t = time.time()
    metrics = Accumulator()
    loss_fn = torch.nn.CrossEntropyLoss(reduction='none')
    for loader in loaders:
        for data, label in loader:
            data = data.cuda()
            label = label.cuda()

            pred = model(data)
            loss = loss_fn(pred, label) # (N)

            _, pred = pred.topk(1, 1, True, True)
            pred = pred.t()
            correct = pred.eq(label.view(1, -1).expand_as(pred)).detach().cpu().numpy() # (1,N)

            metrics.add_dict({
                'minus_loss': -1 * np.sum(loss.detach().cpu().numpy()),
                'correct': np.sum(correct),
                'cnt': len(data)
            })
            del loss, correct, pred, data, label
    del model
    metrics = metrics / 'cnt'
    gpu_secs = (time.time() - start_t) * torch.cuda.device_count()
    reporter(minus_loss=metrics['minus_loss'], top1_valid=metrics['correct'], elapsed_time=gpu_secs, done=True)
    return metrics['correct']
Example #29
0
def eval_controller(config,
                    controller,
                    dataroot,
                    cv_ratio=0.,
                    cv_fold=0,
                    save_path=None,
                    skip_exist=False):
    """
    training with augmented data and test with pure data
    """
    C.get()
    C.get().conf = config
    result = train_and_eval_ctl(None,
                                controller,
                                dataroot,
                                test_ratio=cv_ratio,
                                cv_fold=cv_fold,
                                save_path=save_path,
                                only_eval=skip_exist)
    return C.get()['model']['type'], cv_fold, result
Example #30
0
def train_model(config,
                dataroot,
                augment,
                cv_ratio_test,
                cv_num,
                cv_fold,
                save_path=None,
                skip_exist=False,
                is_dc=False):
    C.get()
    C.get().conf = config
    C.get()['aug'] = augment

    result = train_and_eval(None,
                            os.path.abspath(dataroot),
                            cv_ratio_test,
                            cv_num,
                            cv_fold,
                            save_path=save_path,
                            only_eval=skip_exist)
    return C.get()['model']['type'], cv_fold, result