Exemple #1
0
    def get_databunch(self, valid_pct=0.1):
        df = self._get_df_from_file()

        data_normaliser = preprocessing.MinMaxScaler()
        data = data_normaliser.fit_transform(df["adj_close"].values.reshape(
            -1, 1))

        X = np.array([
            data[i:i + self.day_count].copy()
            for i in range(len(data) - self.day_count)
        ])
        y = np.array([
            data[:, 0][i + self.day_count].copy()
            for i in range(len(data) - self.day_count)
        ])
        tabular_data = np.array([
            df.drop(["adj_close", "Elapsed"],
                    axis=1).iloc[i + self.day_count - 1]
            for i in range(len(data) - self.day_count)
        ])
        y = np.expand_dims(y, -1)

        n = int(len(X) * (1 - valid_pct))

        train_ds = StockDataset(X[:n], tabular_data[:n], y[:n])
        valid_ds = StockDataset(X[n:], tabular_data[n:], y[n:])
        return DataBunch.create(train_ds, valid_ds, bs=self.batch_size)
Exemple #2
0
 def after_prepare_data_hook(self):
     """Put to databunch here"""
     logger.debug("kernel use device %s", self.device)
     self.data = DataBunch.create(self.train_dataset,
                                  self.validation_dataset,
                                  bs=self.config.batch_size,
                                  device=self.device,
                                  num_workers=self.config.num_workers)
Exemple #3
0
def test_fastai_pruning_callback(tmpdir):
    # type: (typing.Any) -> None

    train_x = np.zeros((16, 20), np.float32)
    train_y = np.zeros((16, ), np.int64)
    valid_x = np.zeros((4, 20), np.float32)
    valid_y = np.zeros((4, ), np.int64)
    train_ds = ArrayDataset(train_x, train_y)
    valid_ds = ArrayDataset(valid_x, valid_y)

    data_bunch = DataBunch.create(
        train_ds=train_ds,
        valid_ds=valid_ds,
        test_ds=None,
        path=tmpdir,
        bs=1  # batch size
    )

    def objective(trial):
        # type: (optuna.trial.Trial) -> float

        model = nn.Sequential(nn.Linear(20, 1), nn.Sigmoid())
        learn = Learner(
            data_bunch,
            model,
            metrics=[accuracy],
            callback_fns=[
                partial(FastAIPruningCallback,
                        trial=trial,
                        monitor="valid_loss")
            ],
        )

        learn.fit(1)

        return 1.0

    study = optuna.create_study(pruner=DeterministicPruner(True))
    study.optimize(objective, n_trials=1)
    assert study.trials[0].state == optuna.structs.TrialState.PRUNED

    study = optuna.create_study(pruner=DeterministicPruner(False))
    study.optimize(objective, n_trials=1)
    assert study.trials[0].state == optuna.structs.TrialState.COMPLETE
    assert study.trials[0].value == 1.0
def classifier_data_bunch(config):
    split_df = pd.read_csv(config.split_csv)
    if config.debug_run: split_df = split_df.loc[:200]
    train_df = split_df[split_df['is_valid']==False].reset_index(drop=True)
    valid_df = split_df[split_df['is_valid']==True].reset_index(drop=True)

    if config.load_valid_crops:
        valid_df_crops = []
        for i in range(len(valid_df)):
            for j in range(1, 8):
                crop_id = valid_df.loc[i, 'ImageId_ClassId'].replace('.jpg', '_c{}.jpg'.format(j))
                valid_df_crops.append({'ImageId_ClassId': crop_id, '1': valid_df.loc[i, '1'],
                                     '2': valid_df.loc[i, '2'], '3': valid_df.loc[i, '3'], 
                                     '4': valid_df.loc[i, '4'], 'is_valid': valid_df.loc[i, 'is_valid']})
        valid_df = pd.DataFrame(valid_df_crops)

    train_tf = alb_transform_train(config.imsize)
    valid_tf = alb_transform_test(config.imsize)

    train_ds = SteelClassifierDataset(train_df, transforms=train_tf)
    valid_ds = SteelClassifierDataset(valid_df, transforms=valid_tf)
    data = DataBunch.create(train_ds, valid_ds, bs=config.batch_size,
                            num_workers=config.num_workers)
    return data
                   nn.Linear(n_channels[3], n_classes)]
        
        self.features = nn.Sequential(*layers)
        
    def forward(self, x): return self.features(x)
    
def wrn_22(): 
    return WideResNet(n_groups=3, N=3, n_classes=10, k=6)

model = wrn_22()

from fastai.basic_data import DataBunch
from fastai.train import Learner
from fastai.metrics import accuracy

data = DataBunch.create(train_ds, valid_ds, bs=batch_size, path='./data/cifar10')
learner = Learner(data, model, loss_func=F.cross_entropy, metrics=[accuracy])
learner.clip = 0.1 # gradient is clipped to be in range of [-0.1, 0.1]

# Find best learning rate
learner.lr_find()
learner.recorder.plot() # select lr with largest negative gradient (about 5e-3)

# Training
epochs = 1
lr = 5e-3
wd = 1e-4

import time

t0 = time.time()
Exemple #6
0
def main(config, args):
    if torch.cuda.is_available():
        cudnn.benchmark = True
        print('Using CUDA')
    else:
        print('**** CUDA is not available ****')

    pprint.pprint(config)

    if args.exp is None:
        if not os.path.exists('./config/old_configs/' + config.exp_name):
            os.makedirs('./config/old_configs/' + config.exp_name)
        shutil.copy2(
            './config/config.py',
            './config/old_configs/{}/config.py'.format(config.exp_name))

    if not os.path.exists('./model_weights/' + config.exp_name):
        os.makedirs('./model_weights/' + config.exp_name)
    if not os.path.exists('./logs/' + config.exp_name):
        os.makedirs('./logs/' + config.exp_name)

    data_df = pd.read_csv(config.DATA_CSV_PATH)
    if os.path.exists('/content/data'):
        print('On Colab')
        data_df['Id'] = data_df['Id'].apply(lambda x: '/content' + x[1:])

    if config.dataclass is not None:
        data_df = data_df[data_df['Type(Full/Head/Unclean/Bad)'] ==
                          config.dataclass].reset_index(drop=True)
    split_train_mask = (data_df['Fold'] != 'Fold{}'.format(args.foldidx))
    train_df = data_df[split_train_mask
                       & (data_df['Split'] == 'Train')].reset_index(drop=True)
    valid_df = data_df[(~split_train_mask)
                       & (data_df['Split'] == 'Train')].reset_index(drop=True)
    test_df = data_df[data_df['Split'] == 'Test'].reset_index(drop=True)
    maintest_df = data_df[data_df['Split'] == 'MainTest'].reset_index(
        drop=True)

    print("Training with valid fold: ", args.foldidx)
    print(valid_df.head())

    if config.pseudo_path is not None:
        assert not (config.add_val_pseudo and config.add_val_orig)
        if config.add_val_pseudo:
            pseudo_df = pd.concat((valid_df, test_df, maintest_df))
        else:
            pseudo_df = pd.concat((test_df, maintest_df))
        pseudo_df['Id'] = pseudo_df['Id'] + '_pseudo'
        if config.add_val_orig:
            pseudo_df = pd.concat((pseudo_df, valid_df))
        train_df = pd.concat((train_df, pseudo_df)).reset_index(drop=True)

    train_tfms = get_train_tfms(config)
    print(train_tfms)
    if config.debug and config.reduce_dataset:
        if config.pseudo_path is not None:
            train_df = pd.concat(
                (train_df[:10], pseudo_df[:10])).reset_index(drop=True)
        else:
            train_df = train_df[:10]
        valid_df = valid_df[:10]


#     DatasetClass = KBPDataset2D if psutil.virtual_memory().total < 20e9 else KBPDataset2DStack
    DatasetClass = KBPDataset2D
    train_ds = DatasetClass(config, train_df, transform=train_tfms)
    valid_ds = DatasetClass(config, valid_df, valid=True)

    # valid_dl = DataLoader(valid_ds, batch_size=128, shuffle=False, num_workers=config.num_workers)

    criterion = KBPLoss(config)

    Net = getattr(model_list, config.model_name)

    net = Net(config=config).to(config.device)
    print(net)

    def count_parameters(model):
        return sum(p.numel() for p in model.parameters() if p.requires_grad)

    print("Number of parameters: ", count_parameters(net))

    if config.load_model_ckpt is not None:
        print('Loading model from {}'.format(
            config.load_model_ckpt.format(args.foldidx)))
        net.load_state_dict(
            torch.load(config.load_model_ckpt.format(args.foldidx))['model'])

    gpu = setup_distrib(config.gpu)
    opt = config.optimizer
    mom = config.mom
    alpha = config.alpha
    eps = config.eps

    if opt == 'adam':
        opt_func = partial(optim.Adam,
                           betas=(mom, alpha),
                           eps=eps,
                           amsgrad=config.amsgrad)
    elif opt == 'adamw':
        opt_func = partial(optim.AdamW, betas=(mom, alpha), eps=eps)
    elif opt == 'radam':
        opt_func = partial(RAdam,
                           betas=(mom, alpha),
                           eps=eps,
                           degenerated_to_sgd=config.radam_degenerated_to_sgd)
    elif opt == 'sgd':
        opt_func = partial(optim.SGD, momentum=mom, nesterov=config.nesterov)
    elif opt == 'ranger':
        opt_func = partial(Ranger, betas=(mom, alpha), eps=eps)
    else:
        raise ValueError("Optimizer not recognized")
    print(opt_func)

    data = DataBunch.create(train_ds,
                            valid_ds,
                            bs=config.batch_size,
                            num_workers=config.num_workers)

    # metrics = [dose_score, dvh_score, pred_mean, target_mean]
    metrics = [dose_score2D, dvh_score2D, pred_mean2D, target_mean2D]
    evalbatchaccum = EvalBatchAccumulator(config,
                                          target_bs=128,
                                          num_metrics=len(metrics))
    learn = (Learner(evalbatchaccum,
                     data,
                     net,
                     wd=config.weight_decay,
                     opt_func=opt_func,
                     bn_wd=False,
                     true_wd=True,
                     loss_func=criterion,
                     metrics=metrics,
                     path='./model_weights/{}/'.format(config.exp_name)))
    if config.fp16:
        print('Training with mixed precision...')
        learn = learn.to_fp16(dynamic=True)
    else:
        print('Full precision training...')
    if gpu is None: learn.to_parallel()
    elif num_distrib() > 1: learn.to_distributed(gpu)
    if config.mixup: learn = learn.mixup(alpha=config.mixup, stack_y=False)
    print("Learn path: ", learn.path)
    best_save_cb = SaveBestModel(learn,
                                 config,
                                 outfile='_fold{}'.format(args.foldidx))
    logger_cb = CSVLogger(learn)
    logger_cb.path = Path(
        str(logger_cb.path).replace('model_weights/', 'logs/').replace(
            '.csv', '_fold{}.csv'.format(args.foldidx)))
    callbacks = [best_save_cb, logger_cb]

    if config.teachers is not None:
        package = 'config.old_configs.{}.config'.format(config.teachers)
        teacherconfig = importlib.import_module(package).config
        teachers = []
        for fold in range(5):
            teacher = getattr(model_list, teacherconfig.model_name)
            teacher = teacher(teacherconfig)
            model_ckpt = './model_weights/{}/models/best_dose_fold{}.pth'.format(
                teacherconfig.exp_name, fold)
            print("Loading teacher {} encoder from {}".format(
                fold, model_ckpt))
            teacher.load_state_dict(torch.load(model_ckpt)['model'])
            teacher.to(config.device)
            teacher.eval()
            for param in teacher.parameters():
                param.requires_grad = False
            teachers.append(teacher)
    else:
        teachers = None

    if config.wandb:
        wandb.init(project=config.wandb_project, name=config.exp_name)
        wandb_cb = WandbCallback(learn)
        callbacks.append(wandb_cb)

    print(learn.loss_func.config.loss_dict)
    print(learn.opt_func)
    print("Weight decay: ", learn.wd)

    learn.fit_one_cycle(config.epochs,
                        config.lr,
                        callbacks=callbacks,
                        div_factor=config.div_factor,
                        pct_start=config.pct_start,
                        final_div=config.final_div,
                        teachers=teachers)

    best_str = "Best valid loss: {}, dose score: {}, dvh score: {}".format(
        best_save_cb.best_loss, best_save_cb.best_dose.item(),
        best_save_cb.best_dvh.item())
    print(best_str)
    f = open(
        "./logs/{}/bestmetrics_fold{}.txt".format(config.exp_name,
                                                  args.foldidx), "a")
    f.write(best_str)
    f.close()