예제 #1
0
def main():
    # Parse command-line arguments
    args = parse_args()

    # Split data between training and testing
    splitter = GrandparentSplitter(train_name="training", valid_name="testing")

    # Prepare DataBlock which is a generic container to quickly build Datasets and DataLoaders
    mnist = DataBlock(
        blocks=(ImageBlock(PILImage), CategoryBlock),
        get_items=get_image_files,
        splitter=splitter,
        get_y=parent_label,
    )

    # Download, untar the MNIST data set and create DataLoader from DataBlock
    data = mnist.dataloaders(untar_data(URLs.MNIST), bs=256, num_workers=0)

    # Enable auto logging
    mlflow.fastai.autolog()

    # Create Learner model
    learn = cnn_learner(data, resnet18)

    # Start MLflow session
    with mlflow.start_run():
        # Train and fit with default or supplied command line arguments
        learn.fit_one_cycle(args.epochs, args.lr)
예제 #2
0
def main(epochs):
    Task.init(project_name="examples", task_name="fastai v2")

    path = untar_data(URLs.PETS)
    files = get_image_files(path / "images")

    dls = ImageDataLoaders.from_name_func(path,
                                          files,
                                          label_func,
                                          item_tfms=Resize(224),
                                          num_workers=0)
    dls.show_batch()
    learn = cnn_learner(dls, resnet34, metrics=error_rate)
    learn.fine_tune(epochs)
    learn.show_results()
# ## TensorBoardCallback

# +
path = untar_data(URLs.PETS)

db = DataBlock(blocks=(ImageBlock, CategoryBlock),
               get_items=get_image_files,
               item_tfms=Resize(128),
               splitter=RandomSubsetSplitter(train_sz=0.1, valid_sz=0.01),
               batch_tfms=aug_transforms(size=64),
               get_y=using_attr(RegexLabeller(r'(.+)_\d+.*$'), 'name'))

dls = db.dataloaders(path / 'images')
# -

learn = cnn_learner(dls, resnet18, metrics=accuracy)

learn.unfreeze()
learn.fit_one_cycle(3, cbs=TensorBoardCallback(Path.home() / 'tmp' / 'runs' / 'tb', trace_model=True))

# ## Projector

# ### Projector in TensorBoardCallback

path = untar_data(URLs.PETS)

# +
db = DataBlock(blocks=(ImageBlock, CategoryBlock),
               get_items=get_image_files,
               item_tfms=Resize(128),
               splitter=RandomSubsetSplitter(train_sz=0.05, valid_sz=0.01),
예제 #4
0
    def __init__(self, verbose=True, base_dir='.', test_only=False, **kwargs):
        """Initialize substructure-predicting model

        Args:
            verbose: if True, print messages during initialization
            base_dir: Path to directory containing datasets (each in their
                own folder). Defaults to current directory.
            test_only: if True, initialize with a dummy dataset
                (2 blank images with meaningless metadata).
            **kwargs: Configuration options
        """
        self.print = print = builtins.print if verbose else lambda x: x

        self.train_config = tc = dict(val_galaxies=dds.metadata.val_galaxies,
                                      bad_galaxies=dds.load_bad_galaxies())
        tc.update(**kwargs)

        self.fit_parameters = tc['fit_parameters']
        self.n_params = len(self.fit_parameters)
        self.short_names = [
            dds.short_names.get(pname, pname) for pname in self.fit_parameters
        ]

        if test_only:
            print(f"Setting up model with meaningless toy data. You won't be "
                  "able to train or use predict_all, but you can run predict "
                  "on new images.")
            self.data_dir = dds.make_dummy_dataset()
        else:
            self.data_dir = Path(base_dir) / tc['dataset_name']
            if self.data_dir.exists():
                print(f"Setting up model for dataset {tc['dataset_name']}")
            else:
                raise FileNotFoundError(
                    f"{self.data_dir} not found! Check base dir, or "
                    "pass test_only = True to setup model for evaluation only."
                )

        self.metadata, self.galaxy_indices = dds.load_metadata(
            self.data_dir,
            val_galaxies=tc['val_galaxies'],
            bad_galaxies=tc['bad_galaxies'],
            remove_bad=True,
            verbose=False if test_only else verbose)
        if 'normalizer_means' in tc:
            self.normalizer = dds.Normalizer(
                fit_parameters=self.fit_parameters,
                means=tc['normalizer_means'],
                scales=tc['normalizer_scales'])
        else:
            self.normalizer = dds.Normalizer(self.metadata,
                                             self.fit_parameters)

        self.has_cuda = torch.cuda.is_available()
        print(f"Cuda available: {self.has_cuda}")
        if self.has_cuda:
            print("CUDA device: " +
                  torch.cuda.get_device_name(torch.cuda.current_device()))

        # Setting these up will take a while; looks like it's loading the entire
        # dataset in RAM? I'm probably butchering the fastai dataloader API...
        print("Setting up data block and data loaders, could take a while")
        self.data_block = dds.data_block(
            self.metadata,
            fit_parameters=tc['fit_parameters'],
            data_dir=self.data_dir,
            uncertainty=tc['uncertainty'],
            augment_rotation=tc['augment_rotation'])
        self.data_loaders = self.data_block.dataloaders(None,
                                                        bs=tc['batch_size'])
        print("Dataloaders initialized")

        self.metrics = dds.all_metrics(self.fit_parameters, self.normalizer,
                                       self.short_names,
                                       self.train_config['uncertainty'])

        arch = getattr(fv, tc['architecture'])
        if 'architecture_options' in tc:
            arch = partial(arch, **tc['architecture_options'])

        self.dropout_switch = dds.TestTimeDropout()

        if not tc.get('truncate_final'):
            truncate_final_to = None
        else:
            # Truncate final parameter to physical value = 0;
            # find encoded value of zero
            final_p = list(self.fit_parameters)[-1]
            truncate_final_to = self.normalizer.norm(0, param_name=final_p)
            print(f"Truncating {final_p} to 0, encoded as {truncate_final_to}")

        self.learner = fv.cnn_learner(
            dls=self.data_loaders,
            arch=arch,
            n_in=1,
            n_out=dds.n_out(self.n_params, tc['uncertainty']),
            loss_func=dds.loss_for(
                self.fit_parameters,
                tc['uncertainty'],
                truncate_final_to=truncate_final_to,
                parameter_weights=tc.get('parameter_weights')),
            metrics=self.metrics,
            pretrained=False,
            ps=tc.get('dropout_p', 0.5),
            cbs=[self.dropout_switch],
            bn_final=tc['bn_final'])
예제 #5
0
파일: base.py 프로젝트: vector-ai/vectorhub
 def _create_learner(self):
     dls = self._instantiate_empty_dataloader()
     self.learn = cnn_learner(dls, resnet34, metrics=error_rate)
예제 #6
0
from fastai.vision.all import untar_data, URLs, ImageDataLoaders, get_image_files, Resize, error_rate, resnet34, \
    cnn_learner

from labml import lab, experiment
from labml.utils.fastai import LabMLFastAICallback

path = untar_data(
    URLs.PETS,
    dest=lab.get_data_path(),
    fname=lab.get_data_path() / URLs.path(URLs.PETS).name) / 'images'


def is_cat(x):
    return x[0].isupper()


dls = ImageDataLoaders.from_name_func(path,
                                      get_image_files(path),
                                      valid_pct=0.2,
                                      seed=42,
                                      label_func=is_cat,
                                      item_tfms=Resize(224))
# Train the model ⚡
learn = cnn_learner(dls,
                    resnet34,
                    metrics=error_rate,
                    cbs=LabMLFastAICallback())

with experiment.record(name='pets', exp_conf=learn.labml_configs()):
    learn.fine_tune(5)
예제 #7
0
products = products.new(item_tfms=RandomResizedCrop(168, min_scale=0.8),
                        batch_tfms=aug_transforms())

project_path = Path("/home/yaro/Workspace/fastai/")
dataset_path = project_path.joinpath("for_test")
dls = products.dataloaders(dataset_path)

gpu = None
if torch.cuda.is_available():
    if gpu is not None: torch.cuda.set_device(gpu)
    n_gpu = torch.cuda.device_count()
else:
    n_gpu = None

learn = cnn_learner(dls, resnet18, metrics=error_rate).to_fp16()

# The context manager way of dp/ddp, both can handle single GPU base case.
if gpu is None and n_gpu is not None:
    ctx = learn.parallel_ctx
    with partial(ctx, gpu)():
        print(
            f"Training in {ctx.__name__} context on GPU {list(range(n_gpu))}")
        learn.fine_tune(2)
else:
    learn.fine_tune(2)
# In[]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix()
plt.show()