def main(): # Parse command-line arguments args = parse_args() # Split data between training and testing splitter = GrandparentSplitter(train_name="training", valid_name="testing") # Prepare DataBlock which is a generic container to quickly build Datasets and DataLoaders mnist = DataBlock( blocks=(ImageBlock(PILImage), CategoryBlock), get_items=get_image_files, splitter=splitter, get_y=parent_label, ) # Download, untar the MNIST data set and create DataLoader from DataBlock data = mnist.dataloaders(untar_data(URLs.MNIST), bs=256, num_workers=0) # Enable auto logging mlflow.fastai.autolog() # Create Learner model learn = cnn_learner(data, resnet18) # Start MLflow session with mlflow.start_run(): # Train and fit with default or supplied command line arguments learn.fit_one_cycle(args.epochs, args.lr)
def main(epochs): Task.init(project_name="examples", task_name="fastai v2") path = untar_data(URLs.PETS) files = get_image_files(path / "images") dls = ImageDataLoaders.from_name_func(path, files, label_func, item_tfms=Resize(224), num_workers=0) dls.show_batch() learn = cnn_learner(dls, resnet34, metrics=error_rate) learn.fine_tune(epochs) learn.show_results()
# ## TensorBoardCallback # + path = untar_data(URLs.PETS) db = DataBlock(blocks=(ImageBlock, CategoryBlock), get_items=get_image_files, item_tfms=Resize(128), splitter=RandomSubsetSplitter(train_sz=0.1, valid_sz=0.01), batch_tfms=aug_transforms(size=64), get_y=using_attr(RegexLabeller(r'(.+)_\d+.*$'), 'name')) dls = db.dataloaders(path / 'images') # - learn = cnn_learner(dls, resnet18, metrics=accuracy) learn.unfreeze() learn.fit_one_cycle(3, cbs=TensorBoardCallback(Path.home() / 'tmp' / 'runs' / 'tb', trace_model=True)) # ## Projector # ### Projector in TensorBoardCallback path = untar_data(URLs.PETS) # + db = DataBlock(blocks=(ImageBlock, CategoryBlock), get_items=get_image_files, item_tfms=Resize(128), splitter=RandomSubsetSplitter(train_sz=0.05, valid_sz=0.01),
def __init__(self, verbose=True, base_dir='.', test_only=False, **kwargs): """Initialize substructure-predicting model Args: verbose: if True, print messages during initialization base_dir: Path to directory containing datasets (each in their own folder). Defaults to current directory. test_only: if True, initialize with a dummy dataset (2 blank images with meaningless metadata). **kwargs: Configuration options """ self.print = print = builtins.print if verbose else lambda x: x self.train_config = tc = dict(val_galaxies=dds.metadata.val_galaxies, bad_galaxies=dds.load_bad_galaxies()) tc.update(**kwargs) self.fit_parameters = tc['fit_parameters'] self.n_params = len(self.fit_parameters) self.short_names = [ dds.short_names.get(pname, pname) for pname in self.fit_parameters ] if test_only: print(f"Setting up model with meaningless toy data. You won't be " "able to train or use predict_all, but you can run predict " "on new images.") self.data_dir = dds.make_dummy_dataset() else: self.data_dir = Path(base_dir) / tc['dataset_name'] if self.data_dir.exists(): print(f"Setting up model for dataset {tc['dataset_name']}") else: raise FileNotFoundError( f"{self.data_dir} not found! Check base dir, or " "pass test_only = True to setup model for evaluation only." ) self.metadata, self.galaxy_indices = dds.load_metadata( self.data_dir, val_galaxies=tc['val_galaxies'], bad_galaxies=tc['bad_galaxies'], remove_bad=True, verbose=False if test_only else verbose) if 'normalizer_means' in tc: self.normalizer = dds.Normalizer( fit_parameters=self.fit_parameters, means=tc['normalizer_means'], scales=tc['normalizer_scales']) else: self.normalizer = dds.Normalizer(self.metadata, self.fit_parameters) self.has_cuda = torch.cuda.is_available() print(f"Cuda available: {self.has_cuda}") if self.has_cuda: print("CUDA device: " + torch.cuda.get_device_name(torch.cuda.current_device())) # Setting these up will take a while; looks like it's loading the entire # dataset in RAM? I'm probably butchering the fastai dataloader API... print("Setting up data block and data loaders, could take a while") self.data_block = dds.data_block( self.metadata, fit_parameters=tc['fit_parameters'], data_dir=self.data_dir, uncertainty=tc['uncertainty'], augment_rotation=tc['augment_rotation']) self.data_loaders = self.data_block.dataloaders(None, bs=tc['batch_size']) print("Dataloaders initialized") self.metrics = dds.all_metrics(self.fit_parameters, self.normalizer, self.short_names, self.train_config['uncertainty']) arch = getattr(fv, tc['architecture']) if 'architecture_options' in tc: arch = partial(arch, **tc['architecture_options']) self.dropout_switch = dds.TestTimeDropout() if not tc.get('truncate_final'): truncate_final_to = None else: # Truncate final parameter to physical value = 0; # find encoded value of zero final_p = list(self.fit_parameters)[-1] truncate_final_to = self.normalizer.norm(0, param_name=final_p) print(f"Truncating {final_p} to 0, encoded as {truncate_final_to}") self.learner = fv.cnn_learner( dls=self.data_loaders, arch=arch, n_in=1, n_out=dds.n_out(self.n_params, tc['uncertainty']), loss_func=dds.loss_for( self.fit_parameters, tc['uncertainty'], truncate_final_to=truncate_final_to, parameter_weights=tc.get('parameter_weights')), metrics=self.metrics, pretrained=False, ps=tc.get('dropout_p', 0.5), cbs=[self.dropout_switch], bn_final=tc['bn_final'])
def _create_learner(self): dls = self._instantiate_empty_dataloader() self.learn = cnn_learner(dls, resnet34, metrics=error_rate)
from fastai.vision.all import untar_data, URLs, ImageDataLoaders, get_image_files, Resize, error_rate, resnet34, \ cnn_learner from labml import lab, experiment from labml.utils.fastai import LabMLFastAICallback path = untar_data( URLs.PETS, dest=lab.get_data_path(), fname=lab.get_data_path() / URLs.path(URLs.PETS).name) / 'images' def is_cat(x): return x[0].isupper() dls = ImageDataLoaders.from_name_func(path, get_image_files(path), valid_pct=0.2, seed=42, label_func=is_cat, item_tfms=Resize(224)) # Train the model ⚡ learn = cnn_learner(dls, resnet34, metrics=error_rate, cbs=LabMLFastAICallback()) with experiment.record(name='pets', exp_conf=learn.labml_configs()): learn.fine_tune(5)
products = products.new(item_tfms=RandomResizedCrop(168, min_scale=0.8), batch_tfms=aug_transforms()) project_path = Path("/home/yaro/Workspace/fastai/") dataset_path = project_path.joinpath("for_test") dls = products.dataloaders(dataset_path) gpu = None if torch.cuda.is_available(): if gpu is not None: torch.cuda.set_device(gpu) n_gpu = torch.cuda.device_count() else: n_gpu = None learn = cnn_learner(dls, resnet18, metrics=error_rate).to_fp16() # The context manager way of dp/ddp, both can handle single GPU base case. if gpu is None and n_gpu is not None: ctx = learn.parallel_ctx with partial(ctx, gpu)(): print( f"Training in {ctx.__name__} context on GPU {list(range(n_gpu))}") learn.fine_tune(2) else: learn.fine_tune(2) # In[]: interp = ClassificationInterpretation.from_learner(learn) interp.plot_confusion_matrix() plt.show()