def load_cifar_data(batch_size, image_size, size): if size == 10: path = datasets.untar_data(URLs.CIFAR) else: path = datasets.untar_data(URLs.CIFAR_100) stats = (np.array([0.4914, 0.48216, 0.44653]), np.array([0.24703, 0.24349, 0.26159])) tfms = (get_transforms(do_flip=True, flip_vert=False, max_rotate=25)) data = ImageDataBunch.from_folder(path, valid='test', size=image_size, ds_tfms=tfms, bs=batch_size) data.normalize(imagenet_stats) print("Loaded data") return data
def __init__(self, training: bool): basepath = datasets.untar_data(datasets.URLs.IMAGENETTE_160) if training: path = basepath / 'train' else: path = basepath / 'val' self.fnames, self.mapping = get_fnames(path) self.tfms = Compose([ CenterCrop(128), Lambda(lambda img: img.convert('RGB')), ToTensor(), ])
def load_data(batch_size, image_size, dataset=1): if dataset == 0: path = Path('/scratch/work/public/imagenet/') elif dataset == 1: path = datasets.untar_data(datasets.URLs.IMAGENETTE_160) elif dataset == 2: path = datasets.untar_data(datasets.URLs.IMAGEWOOF_160) train_transforms = [ make_rgb, RandomResizedCrop(image_size, scale=(0.35, 1)), PilRandomFlip(), np_to_float ] valid_transforms = [make_rgb, CenterCrop(image_size), np_to_float] data = Data(path, batch_size=batch_size, image_transforms=train_transforms, valid_image_transforms=valid_transforms, num_workers=8) print("Loaded data") return data
def bilm_learner(data: DataBunch, bptt: int = 70, emb_sz: int = 400, nh: int = 1150, nl: int = 3, pad_token: int = 1, drop_mult: float = 1., tie_weights: bool = True, bias: bool = True, qrnn: bool = False, pretrained_model=None, pretrained_fnames: OptStrTuple = None, **kwargs) -> 'LanguageLearner': "Create a `Learner` with a language model." dps = default_dropout['language'] * drop_mult vocab_size = len(data.vocab.itos) model = get_bilm(vocab_size, emb_sz, nh, nl, pad_token, input_p=dps[0], output_p=dps[1], weight_p=dps[2], embed_p=dps[3], hidden_p=dps[4], tie_weights=tie_weights, bias=bias, qrnn=qrnn) learn = LanguageLearner(data, model, bptt, split_func=bilm_split, **kwargs) if pretrained_model is not None: model_path = untar_data(pretrained_model, data=False) fnames = [ list(model_path.glob(f'*.{ext}'))[0] for ext in ['pth', 'pkl'] ] learn.load_pretrained(*fnames) learn.freeze() if pretrained_fnames is not None: fnames = [ learn.path / learn.model_dir / f'{fn}.{ext}' for fn, ext in zip(pretrained_fnames, ['pth', 'pkl']) ] learn.load_pretrained(*fnames) learn.freeze() learn.loss_func = CrossEntropyLoss( ) # I'm not sure why fast ai is using CrossEntropyFlat but it breaks bilm return learn
def main() -> Path: """ Download and untar Imagenette 160 dataset. Read more about the dataset: https://github.com/fastai/imagenette :return: path to data. """ logger = logging.getLogger(__name__) logger.info('getting Imagenette from the internet') project_dir = Path(__file__).resolve().parents[2] raw_data_dir = project_dir / 'data' / 'raw' path_to_data = untar_data(URLs.IMAGENETTE_160, dest=raw_data_dir) return path_to_data
def test_freeze_unfreeze_effnet(): this_tests(cnn_learner) def get_number_of_trainable_params(model: nn.Module): return sum(p.numel() for p in model.parameters() if p.requires_grad) base_arch = EfficientNetB1 path = untar_data(URLs.MNIST_TINY) data = ImageDataBunch.from_folder(path, size=64) data.c = 1000 # Set number of class to be 1000 to stay in line with the pretrained model. cnn_learn = cnn_learner(data, base_arch, pretrained=True) ref_learn = Learner(data, EfficientNet.from_pretrained("efficientnet-b1")) # By default the neural net in cnn learner is freezed. assert get_number_of_trainable_params( cnn_learn.model) < get_number_of_trainable_params(ref_learn.model) cnn_learn.unfreeze() assert get_number_of_trainable_params( cnn_learn.model) == get_number_of_trainable_params(ref_learn.model)
def download_files(): """Downloads dataset and annotations and returns the respective paths""" path = untar_data(URLs.CARS) path_devkit = untar_data(devkit_url) path_devkit = path_devkit.parent / 'devkit' return path, path_devkit
def path(): path = untar_data(URLs.MNIST_TINY) return path
"interval": "step" }) return [optimizer], schedulers def train_dataloader(self): return DataLoader(ImageListDataset(self.path, self.transforms, self.labels), batch_size=self.params['batch_size'], shuffle=True, num_workers=4) if __name__ == "__main__": dataset_path = untar_data(URLs.PETS) tfms = A.Compose([ A.Resize(224, 224), A.HorizontalFlip(), A.OneOf([ A.RandomContrast(), A.RandomGamma(), A.RandomBrightness(), ], p=0.3), A.ShiftScaleRotate(), A.Normalize(max_pixel_value=1.0, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), ToTensor(), ])