Ejemplo n.º 1
0
 def lr_find(self, files=None, bs=None, n_jobs=-1, verbose=1, **kwargs):
     bs = bs or self.bs
     files = files or self.files
     train_ds = RandomTileDataset(files,
                                  label_fn=self.label_fn,
                                  n_jobs=n_jobs,
                                  verbose=verbose,
                                  **self.mw_kwargs,
                                  **self.ds_kwargs)
     dls = DataLoaders.from_dsets(train_ds, train_ds, bs=bs)
     pre = None if self.pretrained == 'new' else self.pretrained
     model = torch.hub.load(self.repo,
                            self.arch,
                            pretrained=pre,
                            n_classes=dls.c,
                            in_channels=self.in_channels)
     if torch.cuda.is_available(): dls.cuda(), model.cuda()
     learn = Learner(dls,
                     model,
                     metrics=self.metrics,
                     wd=self.wd,
                     loss_func=self.loss_fn,
                     opt_func=_optim_dict[self.optim])
     if self.mpt: learn.to_fp16()
     sug_lrs = learn.lr_find(**kwargs)
     return sug_lrs, learn.recorder
Ejemplo n.º 2
0
def test_fastai_pruning_callback(tmpdir: Any) -> None:

    train_loader = _generate_dummy_dataset()
    test_loader = _generate_dummy_dataset()

    data = DataLoaders(train_loader, test_loader, path=tmpdir)

    def objective(trial: optuna.trial.Trial) -> float:
        model = nn.Sequential(nn.Linear(20, 1), nn.Sigmoid())
        learn = Learner(
            data,
            model,
            loss_func=F.nll_loss,
            metrics=[accuracy],
            cbs=[CudaCallback],
        )
        learn.fit(1, cbs=FastAIV2PruningCallback(trial))

        return 1.0

    study = optuna.create_study(pruner=DeterministicPruner(True))
    study.optimize(objective, n_trials=1)
    assert study.trials[0].state == optuna.trial.TrialState.PRUNED

    study = optuna.create_study(pruner=DeterministicPruner(False))
    study.optimize(objective, n_trials=1)
    assert study.trials[0].state == optuna.trial.TrialState.COMPLETE
    assert study.trials[0].value == 1.0
Ejemplo n.º 3
0
    def make_datasets(
        self,
        series=None,
        valid_p=None,
        train_bs=None,
        valid_bs=None,
        normalize=None,
    ):
        if series is None:
            if self.df is None:
                raise ValueError("must pass a series.")
        else:
            self.tabularize(series)
        valid_p = self.valid_p if valid_p is None else valid_p
        train_bs = self.train_bs if train_bs is None else train_bs
        valid_bs = self.valid_bs if valid_bs is None else valid_bs
        normalize = self.normalize if normalize is None else normalize

        procs = []
        if normalize:
            procs.append(Normalize)

        df_all = self.df
        splits = utils_data.split_by_p_valid(valid_p, len(df_all))
        cont_names = [col for col in list(df_all.columns) if "x_" == col[:2]]
        target_names = [col for col in list(df_all.columns) if "y_" == col[:2]]
        tp = TabularPandas(
            df_all,
            procs=procs,
            cat_names=None,
            cont_names=cont_names,
            y_names=target_names,
            splits=splits,
        )
        log.debug("cont var num: {}, names: {}".format(len(tp.cont_names),
                                                       tp.cont_names))

        trn_dl = TabDataLoader(tp.train,
                               bs=train_bs,
                               shuffle=True,
                               drop_last=True,
                               device=self.device)
        val_dl = TabDataLoader(tp.valid, bs=valid_bs, device=self.device)
        self.dls = DataLoaders(trn_dl, val_dl, device=self.device)
        log.debug("showing batch")
        log.debug("{}".format(self.dls.show_batch(show=False)))
        return self
Ejemplo n.º 4
0
def make_torch_dataloaders(train_dataset,
                           test_dataset,
                           rank,
                           world_size,
                           bs,
                           num_workers=4,
                           distrib=True,
                           sync_valid=False):
    "make torch-based distributed dataloaders from torch compatible datasets"
    if distrib:
        train_sampler = th_distrib.DistributedSampler(train_dataset,
                                                      num_replicas=world_size,
                                                      rank=rank,
                                                      shuffle=True)
        train_loader = th_data.DataLoader(
            train_dataset,
            batch_size=bs,
            sampler=train_sampler,
            # shuffle=True,
            num_workers=num_workers,
            drop_last=True)

        if sync_valid:
            test_sampler = th_distrib.DistributedSampler(
                test_dataset,
                num_replicas=world_size,
                rank=rank,
                shuffle=False)

            test_loader = th_data.DataLoader(
                test_dataset,
                batch_size=bs,
                sampler=test_sampler,
                # shuffle=False,
                num_workers=num_workers,
                drop_last=True)
        else:
            test_loader = th_data.DataLoader(test_dataset,
                                             batch_size=bs,
                                             shuffle=False,
                                             num_workers=num_workers,
                                             drop_last=True)

    else:
        train_loader = th_data.DataLoader(
            train_dataset,
            batch_size=bs,
            # sampler=train_sampler,
            shuffle=True,
            num_workers=num_workers,
            drop_last=True)

        test_loader = th_data.DataLoader(test_dataset,
                                         batch_size=bs,
                                         shuffle=False,
                                         num_workers=num_workers,
                                         drop_last=True)
    dataloaders = DataLoaders(train_loader, test_loader, device=None)
    return dataloaders
Ejemplo n.º 5
0
def get_learner(
    data, arch, lr, loss_func=nn.MSELoss(), cb_funcs=None, opt_func=Adam, **kwargs
):
    init_cnn(arch)
    dls = DataLoaders.from_dsets(
        data.train_ds,
        data.valid_ds,
        bs=data.train_dl.batch_size,
    )
    return Learner(dls, arch, loss_func, lr=lr, cbs=cb_funcs, opt_func=opt_func)
Ejemplo n.º 6
0
def read_data_loaders():
    three_stacked, seven_stacked, threes_stacked_v, seven_stacked_v = read_mnist(
    )

    train_x, train_y, train_dest = create_mnist_dset(three_stacked,
                                                     seven_stacked)
    valid_x, valid_y, valid_dset = create_mnist_dset(threes_stacked_v,
                                                     seven_stacked_v)

    dl = DataLoader(train_dest, batch_size=256, shuffle=True)
    valid_dl = DataLoader(valid_dset, batch_size=256, shuffle=True)
    return DataLoaders(dl, valid_dl)
Ejemplo n.º 7
0
def make_fastai_cifar_dls(path, bs=64, check=True, device=None, **kwargs):
    train_tfm, test_tfm = make_torch_tfms()
    train_dl = make_cifar_dl(cifar_dsets.CIFAR10.train_list,
                             path,
                             train_tfm,
                             check=check,
                             bs=bs,
                             shuffle=True)
    test_dl = make_cifar_dl(cifar_dsets.CIFAR10.test_list,
                            path,
                            test_tfm,
                            check=check,
                            bs=bs,
                            shuffle=False)
    dls = DataLoaders(train_dl, test_dl, device=device)
    return dls
Ejemplo n.º 8
0
 def predict(self, files, model_no, path=None, **kwargs):
     model_path = self.models[model_no]
     model = self.load_model(model_path)
     ds_kwargs = self.ds_kwargs
     # Adding extra padding (overlap) for models that have the same input and output shape
     if ds_kwargs['padding'][0] == 0:
         ds_kwargs['padding'] = (self.extra_padding, ) * 2
     ds = TileDataset(files, **ds_kwargs)
     dls = DataLoaders.from_dsets(ds,
                                  batch_size=self.bs,
                                  after_batch=self.get_batch_tfms(),
                                  shuffle=False,
                                  drop_last=False,
                                  **self.dl_kwargs)
     if torch.cuda.is_available(): dls.cuda()
     learn = Learner(dls, model, loss_func=self.loss_fn)
     if self.mpt: learn.to_fp16()
     if path: path = path / f'model_{model_no}'
     return learn.predict_tiles(dl=dls.train, path=path, **kwargs)
Ejemplo n.º 9
0
 def _get_dls(self, files, files_val=None):
     ds = []
     ds.append(
         RandomTileDataset(files,
                           label_fn=self.label_fn,
                           **self.train_ds_kwargs))
     if files_val:
         ds.append(
             TileDataset(files_val,
                         label_fn=self.label_fn,
                         **self.train_ds_kwargs))
     else:
         ds.append(ds[0])
     dls = DataLoaders.from_dsets(*ds,
                                  bs=self.batch_size,
                                  pin_memory=True,
                                  **self.dl_kwargs)
     if torch.cuda.is_available(): dls.cuda()
     return dls
Ejemplo n.º 10
0
def build_distributed_dataloaders(dls, rank, world_size, sync_valid=False):
    """Wrap dataloaders with distributed TPU aware dataloader """
    new_loaders = []
    for i,dl in enumerate(dls.loaders):
        if i == 0 or sync_valid:
            use_rank = rank
            use_size = world_size
        else:
            use_rank = 0
            use_size = 1
        if isinstance(dl, th_data.DataLoader):
            if i == 0: # set train dl to use distrib sampler
                dl.set_distributed_sampler(use_rank, use_size)
        else: # fastai dataloader
            dl = TPUDistributedDL(dl,
                                rank=use_rank,
                                world_size=use_size)
        new_loaders += [dl]
    return DataLoaders(*new_loaders, path=dls.path, device=dls.device)
Ejemplo n.º 11
0
 def predict(self, files, model_no, bs=None, **kwargs):
     bs = bs or self.bs
     model_path = self.models[model_no]
     model = self.load_model(model_path)
     batch_tfms = Normalize.from_stats(*self.stats)
     ds = TileDataset(files, **self.ds_kwargs)
     dls = DataLoaders.from_dsets(ds,
                                  batch_size=bs,
                                  after_batch=batch_tfms,
                                  shuffle=False,
                                  drop_last=False,
                                  num_workers=0)
     if torch.cuda.is_available(): dls.cuda(), model.cuda()
     learn = Learner(dls, model, loss_func=self.loss_fn)
     if self.mpt: learn.to_fp16()
     results = learn.predict_tiles(dl=dls.train, **kwargs)
     pth_tmp = self.path / '.tmp' / model_path.name
     save_tmp(pth_tmp, files, results)
     return results
Ejemplo n.º 12
0
 def get_dls(self, files, files_val=None):
     ds = []
     ds.append(
         RandomTileDataset(files,
                           label_fn=self.label_fn,
                           **self.mw_kwargs,
                           **self.ds_kwargs))
     if files_val:
         ds.append(
             TileDataset(files_val,
                         label_fn=self.label_fn,
                         **self.mw_kwargs,
                         **self.ds_kwargs))
     else:
         ds.append(ds[0])
     dls = DataLoaders.from_dsets(*ds,
                                  bs=self.bs,
                                  after_item=self.item_tfms,
                                  after_batch=self.get_batch_tfms(),
                                  **self.dl_kwargs)
     if torch.cuda.is_available(): dls.cuda()
     return dls
Ejemplo n.º 13
0
# We just need to split our filenames between a training and validation set to use it.

idxs = np.random.permutation(range(len(files)))
cut = int(0.8 * len(files))
train_files = files[idxs[:cut]]
valid_files = files[idxs[cut:]]

# We can then use it to create datasets.

train_ds = SiameseDataset(train_files)
valid_ds = SiameseDataset(valid_files, is_valid=True)

# All of the above would be different for your custom problem, the main point is that as soon as you have some `Dataset`s, you can create a fastai's `DataLoaders` with the following factory method:


dls = DataLoaders.from_dsets(train_ds, valid_ds)

# You can then use this `DataLoaders` object in a `Learner` and start training. Most methods that don't rely on showing something (e.g. `DataLoaders.show_batch` and `Learner.show_results` for instance) should work. For instance, you can get and inspect a batch with:

b = dls.one_batch()

# If you want to use the GPU, you can just write:

dls = dls.cuda()

# Now, what is a bit annoying is that we have to rewrite everything that is already in fastai if we want to normalize our images, or apply data augmentation. With minimal changes to the code we wrote, we can still access all of that and get all the show method to work as a cherry on the top. Let's see how.

# ### Using the mid-level API

# When you have a custom dataset like before, you can easily convert it into a fastai `Transform` by just changing the `__getitem__` function to <code>encodes</code>. In general, a `Transform` in fastai calls the <code>encodes</code> method when you apply it on an item (a bit like PyTorch modules call `forward` when applied on something) so this will transform your python dataset in a function that transforms integer to your data.
#
Ejemplo n.º 14
0
    def fit(self,
            i,
            n_iter=None,
            lr_max=None,
            bs=None,
            n_jobs=-1,
            verbose=1,
            **kwargs):
        n_iter = n_iter or self.n_iter
        lr_max = lr_max or self.lr
        bs = bs or self.bs
        self.stats = self.stats or self.ds.compute_stats()
        name = self.ensemble_dir / f'{self.arch}_model-{i}.pth'
        files_train, files_val = self.splits[i]
        train_ds = RandomTileDataset(files_train,
                                     label_fn=self.label_fn,
                                     n_jobs=n_jobs,
                                     verbose=verbose,
                                     **self.mw_kwargs,
                                     **self.ds_kwargs)
        valid_ds = TileDataset(files_val,
                               label_fn=self.label_fn,
                               n_jobs=n_jobs,
                               verbose=verbose,
                               **self.mw_kwargs,
                               **self.ds_kwargs)
        batch_tfms = Normalize.from_stats(*self.stats)
        dls = DataLoaders.from_dsets(train_ds,
                                     valid_ds,
                                     bs=bs,
                                     after_item=self.item_tfms,
                                     after_batch=batch_tfms)
        pre = None if self.pretrained == 'new' else self.pretrained
        model = torch.hub.load(self.repo,
                               self.arch,
                               pretrained=pre,
                               n_classes=dls.c,
                               in_channels=self.in_channels,
                               **kwargs)
        if torch.cuda.is_available(): dls.cuda(), model.cuda()
        learn = Learner(dls,
                        model,
                        metrics=self.metrics,
                        wd=self.wd,
                        loss_func=self.loss_fn,
                        opt_func=_optim_dict[self.optim],
                        cbs=self.cbs)
        learn.model_dir = self.ensemble_dir.parent / '.tmp'
        if self.mpt: learn.to_fp16()
        print(f'Starting training for {name.name}')
        epochs = calc_iterations(n_iter=n_iter, ds_length=len(train_ds), bs=bs)
        learn.fit_one_cycle(epochs, lr_max)

        print(f'Saving model at {name}')
        name.parent.mkdir(exist_ok=True, parents=True)
        self.save_model(name, learn.model)
        self.models[i] = name
        self.recorder[i] = learn.recorder
        del model
        gc.collect()
        torch.cuda.empty_cache()
Ejemplo n.º 15
0
def dls_from_pytorch(
        train_data_path: Union[str, PosixPath],
        val_data_path: Union[str, PosixPath],
        train_tfms: List,
        val_tfms: List,
        batch_size: int,
        num_workers: int,
        dataset_func: Callable = ImageFolderDataset,
        loader: Callable = default_loader,
        image_backend: str = 'pil',  # 'accimage'
        limit_dataset: Union[bool, int] = False,
        pin_memory: bool = True,
        shuffle: bool = True,
        shuffle_val: bool = False,
        drop_last: bool = True,
        drop_last_val: bool = False,
        persistent_workers: bool = False):
    """Return fastai dataloaders created from pytorch dataloaders.

    Args:
        train_data_path (Union[str, PosixPath]): path for train data.
        val_data_path (Union[str, PosixPath]): path for validation data.
        train_tfms (List): List of transforms for train data.
        val_tfms (List): List of transforms for validation data
        batch_size (int): Batch size
        num_workers (int): Number of workers
        dataset_func (Callable, optional): Funtion or class to create dataset. Defaults to ImageFolderDataset.
        loader (Callable, optional): Function that load image. Defaults to default_loader.
        image_backend (str, optional): Image backand to use. Defaults to 'pil'.
        pin_memory (bool, optional): Use pin memory. Defaults to True.
        shuffle (bool, optional): Use shuffle for train data. Defaults to True.
        shuffle_val (bool, optional): Use shuffle for validation data. Defaults to False.
        drop_last (bool, optional): If last batch not full drop it or not. Defaults to True.
        drop_last_val (bool, optional): If last batch on validation data not full drop it or not. Defaults to False.
        persistent_workers (bool, optional): Use persistante workers. Defaults to False.

    Returns:
        fastai dataloaders
    """
    set_image_backend(image_backend)
    train_tfms = T.Compose(train_tfms)
    val_tfms = T.Compose(val_tfms)
    train_ds = dataset_func(root=train_data_path,
                            transform=train_tfms,
                            loader=loader,
                            limit_dataset=limit_dataset)
    val_ds = dataset_func(root=val_data_path,
                          transform=val_tfms,
                          loader=loader,
                          limit_dataset=limit_dataset)

    train_loader = DataLoader(dataset=train_ds,
                              batch_size=batch_size,
                              num_workers=num_workers,
                              pin_memory=pin_memory,
                              shuffle=shuffle,
                              drop_last=drop_last,
                              persistent_workers=persistent_workers)
    val_loader = DataLoader(dataset=val_ds,
                            batch_size=batch_size,
                            num_workers=num_workers,
                            pin_memory=pin_memory,
                            shuffle=shuffle_val,
                            drop_last=drop_last_val,
                            persistent_workers=persistent_workers)
    return DataLoaders(train_loader, val_loader)
Ejemplo n.º 16
0
class ARNet:
    ar_order: int
    sparsity: float = None
    n_forecasts: int = 1
    n_epoch: int = 20
    lr: float = None
    est_noise: float = None
    start_reg_pct: float = 0.0
    full_reg_pct: float = 0.5
    use_reg_noise: bool = False
    reg_c1: float = 2.0
    reg_c2: float = 2.0
    loss_func: str = "huber"
    train_bs: int = 32
    valid_bs: int = 1024
    valid_p: float = 0.1
    normalize: bool = False
    ar_params: list = None
    log_level: str = None
    callbacks: list = None
    metrics: list = None
    use_gpu: bool = False
    dls: DataLoaders = field(init=False, default=None)
    learn: TabularLearner = field(init=False, default=None)
    coeff: list = field(init=False, default=None)
    df: pd.DataFrame = field(init=False, default=None)
    regularizer: SparsifyAR = field(init=False, default=None)

    def __post_init__(self):
        if self.log_level is not None:
            utils.set_logger_level(log, self.log_level)
        self.loss_func = get_loss_func(self.loss_func)
        if self.use_gpu:
            if torch.cuda.is_available():
                self.device = torch.device("cuda")
                # torch.cuda.set_device(0)
            else:
                log.error("CUDA is not available. defaulting to CPU")
                self.device = torch.device("cpu")
        else:
            self.device = torch.device("cpu")

    def tabularize(self, series):
        if self.est_noise is None:
            self.est_noise = utils_data.estimate_noise(series)
            log.info("estimated noise of series: {}".format(self.est_noise))
        df_all = utils_data.tabularize_univariate(series, self.ar_order,
                                                  self.n_forecasts)
        log.debug("tabularized df")
        log.debug("df columns: {}".format(list(df_all.columns)))
        log.debug("df shape: {}".format(df_all.shape))
        # log.debug("df head(3): {}".format(df_all.head(3)))
        self.df = df_all
        return self

    def make_datasets(
        self,
        series=None,
        valid_p=None,
        train_bs=None,
        valid_bs=None,
        normalize=None,
    ):
        if series is None:
            if self.df is None:
                raise ValueError("must pass a series.")
        else:
            self.tabularize(series)
        valid_p = self.valid_p if valid_p is None else valid_p
        train_bs = self.train_bs if train_bs is None else train_bs
        valid_bs = self.valid_bs if valid_bs is None else valid_bs
        normalize = self.normalize if normalize is None else normalize

        procs = []
        if normalize:
            procs.append(Normalize)

        df_all = self.df
        splits = utils_data.split_by_p_valid(valid_p, len(df_all))
        cont_names = [col for col in list(df_all.columns) if "x_" == col[:2]]
        target_names = [col for col in list(df_all.columns) if "y_" == col[:2]]
        tp = TabularPandas(
            df_all,
            procs=procs,
            cat_names=None,
            cont_names=cont_names,
            y_names=target_names,
            splits=splits,
        )
        log.debug("cont var num: {}, names: {}".format(len(tp.cont_names),
                                                       tp.cont_names))

        trn_dl = TabDataLoader(tp.train,
                               bs=train_bs,
                               shuffle=True,
                               drop_last=True,
                               device=self.device)
        val_dl = TabDataLoader(tp.valid, bs=valid_bs, device=self.device)
        self.dls = DataLoaders(trn_dl, val_dl, device=self.device)
        log.debug("showing batch")
        log.debug("{}".format(self.dls.show_batch(show=False)))
        return self

    def create_regularizer(
        self,
        sparsity=None,
        start_reg_pct=None,
        full_reg_pct=None,
        est_noise=None,
        use_reg_noise=None,
        reg_c1=None,
        reg_c2=None,
    ):
        sparsity = self.sparsity if sparsity is None else sparsity
        start_reg_pct = self.start_reg_pct if start_reg_pct is None else start_reg_pct
        full_reg_pct = self.full_reg_pct if full_reg_pct is None else full_reg_pct
        est_noise = self.est_noise if est_noise is None else est_noise
        use_reg_noise = self.use_reg_noise if use_reg_noise is None else use_reg_noise
        reg_c1 = self.reg_c1 if reg_c1 is None else reg_c1
        reg_c2 = self.reg_c2 if reg_c2 is None else reg_c2

        self.regularizer = SparsifyAR(
            sparsity,
            est_noise=est_noise if use_reg_noise else None,
            start_pct=start_reg_pct,
            full_pct=full_reg_pct,
            c1=reg_c1,
            c2=reg_c2,
        )
        log.info("reg lam (max): {}".format(self.regularizer.lam_max))
        return self

    def create_learner(
        self,
        loss_func=None,
        metrics=None,
        ar_params=None,
        callbacks=None,
    ):
        loss_func = self.loss_func if loss_func is None else get_loss_func(
            loss_func)
        metrics = self.metrics if metrics is None else metrics
        ar_params = self.ar_params if ar_params is None else ar_params
        callbacks = self.callbacks if callbacks is None else callbacks

        if metrics is None:
            metrics = ["MSE", "MAE"]
            metrics = [get_loss_func(m) for m in metrics]
        if ar_params is not None:
            metrics.append(sTPE(ar_params, at_epoch_end=False))

        if callbacks is None:
            callbacks = []
        if self.sparsity is not None and self.regularizer is None:
            self.create_regularizer()
        if self.regularizer is not None:
            callbacks.append(self.regularizer)

        self.learn = tabular_learner(
            self.dls,
            layers=[],  # Note: None defaults to [200, 100]
            config={
                "use_bn": False,
                "bn_final": False,
                "bn_cont": False
            },
            n_out=self.n_forecasts,  # None calls get_c(dls)
            train_bn=False,  # passed to Learner
            metrics=metrics,  # passed to Learner
            loss_func=loss_func,
            cbs=callbacks,
        )
        log.debug("{}".format(self.learn.model))
        return self

    def find_lr(self, plot=True):
        if self.learn is None:
            raise ValueError("create learner first.")
        lr_at_min, lr_steep = self.learn.lr_find(start_lr=1e-6,
                                                 end_lr=10,
                                                 num_it=300,
                                                 show_plot=plot)
        if plot:
            plt.show()
        log.debug("lr at minimum: {}; (steepest lr: {})".format(
            lr_at_min, lr_steep))
        lr = lr_at_min
        log.info("Optimal learning rate: {}".format(lr))
        self.lr = lr
        return self

    def fit_one_cycle(self, n_epoch=None, lr=None, cycles=1, plot=True):
        n_epoch = self.n_epoch if n_epoch is None else n_epoch
        lr = self.lr if lr is None else lr

        if lr is None:
            self.find_lr(plot=plot)
            lr = self.lr
        for i in range(0, cycles):
            self.learn.fit_one_cycle(n_epoch=n_epoch,
                                     lr_max=lr,
                                     div=25.0,
                                     div_final=10000.0,
                                     pct_start=0.25)
            lr = lr / 10
            if plot:
                self.learn.recorder.plot_loss(skip_start=20)
        if plot:
            plt.show()
        # record Coeff
        self.coeff = utils.coeff_from_model(self.learn.model)
        return self

    def fit(self, series, plot=False):
        self.make_datasets(series)
        self.create_learner()
        self.fit_one_cycle(plot=plot)
        return self

    def plot_weights(self, **kwargs):
        plotting.plot_weights(
            ar_val=self.ar_order,
            weights=self.coeff[0],
            ar=self.ar_params,
            **kwargs,
        )

    def plot_fitted_obs(self, num_obs=100, **kwargs):
        preds, y = self.learn.get_preds()
        if num_obs is not None:
            y = y[0:num_obs]
            preds = preds[0:num_obs]
        plotting.plot_prediction_sample(preds, y, **kwargs)

    def plot_errors(self, **kwargs):
        preds, y = self.learn.get_preds()
        plotting.plot_error_scatter(preds, y, **kwargs)

    def save_model(self, results_path="results", model_name=None):
        # self.learn.freeze()
        sparsity = 1.0 if self.sparsity is None else self.sparsity
        if model_name is None:
            model_name = "ar{}_sparse_{:.3f}_ahead_{}_epoch_{}.pkl".format(
                self.ar_order, sparsity, self.n_forecasts, self.n_epoch)
        self.learn.export(fname=os.path.join(results_path, model_name))
        return self

    def load_model(self, results_path="results", model_name=None, cpu=True):
        self.learn = load_learner(fname=os.path.join(results_path, model_name),
                                  cpu=cpu)
        # can unfreeze the model and fine_tune
        self.learn.unfreeze()
        return self
Ejemplo n.º 17
0
# `DataLoader` class is compatible with PyTorch `DataLoader`, but provides
# several extensions.
#
# Adjust the batch sizes to the memory of your GPU.

# %%
mnist_train_fastai_data_loader = dl.DataLoader(
    mnist_train_dataset, batch_size=50, shuffle=True, device=device
)
mnist_test_fastai_data_loader = dl.DataLoader(
    mnist_test_dataset, batch_size=1000, drop_last=False, device=device
)

# %%
mnist_dls = DataLoaders(
    mnist_train_fastai_data_loader, mnist_test_fastai_data_loader, device=device
)

# %%[markdown]
#
# The `DataLoaders` class provides a few convenience methods, e.g.,
# `one_batch()`.
#
# To access the training and validation `DataLoader`s, use the `train` and
# `valid` properties:

# %%
type(mnist_dls.train), type(mnist_dls.valid)

# %%
x, y = mnist_dls.train.one_batch()