Esempio n. 1
0
def test_db_num_workers():
    if is_windows():
        assert db_num_workers() == 0
        assert db_num_workers(non_windows_num_workers=7) == 0
    else:
        assert db_num_workers() == 16
        assert db_num_workers(non_windows_num_workers=7) == 7
Esempio n. 2
0
    def __init__(
        self,
        root: Union[str, Path],
        batch_size: int = 2,
        transforms: object = get_transform(train=True),
        train_pct: float = 0.5,
        anno_dir: str = "annotations",
        im_dir: str = "images",
    ):
        """ initialize dataset

        This class assumes that the data is formatted in two folders:
            - annotation folder which contains the Pascal VOC formatted
              annotations
            - image folder which contains the images

        Args:
            root: the root path of the dataset containing the image and
            annotation folders
            batch_size: batch size for dataloaders
            transforms: the transformations to apply
            train_pct: the ratio of training to testing data
            annotation_dir: the name of the annotation subfolder under the root directory
            im_dir: the name of the image subfolder under the root directory. If set to 'None' then infers image location from annotation .xml files
        """

        self.root = Path(root)
        # TODO think about how transforms are working...
        self.transforms = transforms
        self.im_dir = im_dir
        self.anno_dir = anno_dir
        self.batch_size = batch_size
        self.train_pct = train_pct

        # read annotations
        self._read_annos()

        # create training and validation datasets
        self.train_ds, self.test_ds = self.split_train_test(
            train_pct=train_pct
        )

        # create training and validation data loaders
        self.train_dl = DataLoader(
            self.train_ds,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=db_num_workers(),
            collate_fn=collate_fn,
        )

        self.test_dl = DataLoader(
            self.test_ds,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=db_num_workers(),
            collate_fn=collate_fn,
        )
def test_set_random_seed(tiny_ic_data_path):
    # check two data batches are the same after seeding
    set_random_seed(1)
    first_data = (ImageList.from_folder(tiny_ic_data_path).split_by_rand_pct().
                  label_from_folder().transform().databunch(
                      bs=5, num_workers=db_num_workers()).normalize())
    first_batch = first_data.one_batch()

    set_random_seed(1)
    second_data = (ImageList.from_folder(tiny_ic_data_path).split_by_rand_pct(
    ).label_from_folder().transform().databunch(
        bs=5, num_workers=db_num_workers()).normalize())
    second_batch = second_data.one_batch()
    assert first_batch[1].tolist() == second_batch[1].tolist()
    def _get_data_bunch_segmentationitemlist(
            path: Union[Path, str], transform: bool, im_size: int, bs: int,
            classes: List[str]) -> ImageDataBunch:
        """
        Create ImageDataBunch and return it. TODO in future version is to allow
        users to pass in their own image bunch or their own Transformation
        objects (instead of using fastai's <get_transforms>)

        Args:
            path (Union[Path, str]): path to data to create databunch with
            transform (bool): a flag to set fastai default transformations (get_transforms())
            im_size (int): image size of databunch
            bs (int): batch size of databunch
        Returns:
            ImageDataBunch
        """
        path = path if type(path) is Path else Path(path)
        tfms = get_transforms() if transform else None
        im_path = path / "images"
        anno_path = path / "segmentation-masks"
        get_gt_filename = lambda x: anno_path / f"{x.stem}.png"

        # Load data
        return (SegmentationItemList.from_folder(im_path).split_by_rand_pct(
            valid_pct=0.33).label_from_func(
                get_gt_filename, classes=classes).transform(
                    tfms=tfms, size=im_size, tfm_y=True).databunch(
                        bs=bs,
                        num_workers=db_num_workers()).normalize(imagenet_stats)
                )
Esempio n. 5
0
    def init_data_loaders(self):
        """ Create training and validation data loaders """
        self.train_dl = DataLoader(
            self.train_ds,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=db_num_workers(),
            collate_fn=collate_fn,
        )

        self.test_dl = DataLoader(
            self.test_ds,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=db_num_workers(),
            collate_fn=collate_fn,
        )
def tiny_seg_databunch(tiny_seg_data_path, seg_classes):
    """ Returns a databunch object for the segmentation tiny fridge objects dataset. """
    get_gt_filename = (
        lambda x: f"{tiny_seg_data_path}/segmentation-masks/{x.stem}.png")
    return (
        SegmentationItemList.from_folder(tiny_seg_data_path).split_by_rand_pct(
            valid_pct=0.1, seed=10).label_from_func(
                get_gt_filename, classes=seg_classes).transform(
                    get_transforms(), tfm_y=True, size=50).databunch(
                        bs=8,
                        num_workers=db_num_workers()).normalize(imagenet_stats)
    )
def tiny_ic_databunch(tmp_session):
    """ Returns a databunch object for the tiny fridge objects dataset. """
    im_paths = unzip_url(
        ic_urls.fridge_objects_tiny_path,
        fpath=tmp_session,
        dest=tmp_session,
        exist_ok=True,
    )
    return (ImageList.from_folder(im_paths).split_by_rand_pct(
        valid_pct=0.1,
        seed=20).label_from_folder().transform(size=50).databunch(
            bs=16, num_workers=db_num_workers()).normalize(imagenet_stats))
    def _get_data_bunch_imagelist(path: Union[Path, str], transform: bool,
                                  im_size: int, bs: int) -> ImageDataBunch:
        """
        Create ImageDataBunch and return it. TODO in future version is to allow
        users to pass in their own image bunch or their own Transformation
        objects (instead of using fastai's <get_transforms>)

        Args:
            path (Union[Path, str]): path to data to create databunch with
            transform (bool): a flag to set fastai default transformations (get_transforms())
            im_size (int): image size of databunch
            bs (int): batch size of databunch
        Returns:
            ImageDataBunch
        """
        path = path if type(path) is Path else Path(path)
        tfms = get_transforms() if transform else None
        return (ImageList.from_folder(path).split_by_rand_pct(
            valid_pct=0.33).label_from_folder().transform(
                tfms=tfms, size=im_size).databunch(
                    bs=bs,
                    num_workers=db_num_workers()).normalize(imagenet_stats))
def testing_databunch(tmp_session):
    """ Builds a databunch from the Fridge Objects
    and returns its validation component that is used
    to test comparative_set_builder"""
    im_paths = unzip_url(
        ic_urls.fridge_objects_tiny_path,
        fpath=tmp_session,
        dest=tmp_session,
        exist_ok=True,
    )
    can_im_paths = os.listdir(os.path.join(im_paths, "can"))
    can_im_paths = [
        os.path.join(im_paths, "can", im_name) for im_name in can_im_paths
    ][0:5]
    random.seed(642)
    data = (ImageList.from_folder(im_paths).split_by_rand_pct(
        valid_pct=0.2,
        seed=20).label_from_folder().transform(size=300).databunch(
            bs=16, num_workers=db_num_workers()).normalize(imagenet_stats))

    validation_bunch = data.valid_ds

    return validation_bunch
Esempio n. 10
0
print(f"Fast.ai version = {fastai.__version__}")
which_processor()

EPOCHS = 10
LEARNING_RATE = 1e-4
IM_SIZE = 300

BATCH_SIZE = 16
ARCHITECTURE = models.resnet18
path = Path('/app/classifier_data/')

data = (ImageList.from_folder(path).split_by_rand_pct(
    valid_pct=0.2,
    seed=10).label_from_folder().transform(size=IM_SIZE).databunch(
        bs=BATCH_SIZE, num_workers=db_num_workers()).normalize(imagenet_stats))

print(f'number of classes: {data.c}')
print(data.classes)

learn = cnn_learner(
    data,
    ARCHITECTURE,
    metrics=[accuracy],
    callback_fns=[partial(TrainMetricsRecorder, show_graph=True)])
learn.unfreeze()
learn.fit(EPOCHS, LEARNING_RATE)
learn.export(file=Path("/app/classifier_model.pkl"))
_, validation_accuracy = learn.validate(learn.data.valid_dl,
                                        metrics=[accuracy])
print(f'Accuracy on validation set: {100*float(validation_accuracy):3.2f}')
Esempio n. 11
0
BATCH_SIZE = 32
IM_SIZE = 224
DROPOUT = 0
ARCHITECTURE = models.resnet50

# Desired embedding dimension. Higher dimensions slow down retrieval but often provide better accuracy.
EMBEDDING_DIM = 4096
assert EMBEDDING_DIM == 4096 or EMBEDDING_DIM <= 2048

# Load images into fast.ai's ImageDataBunch object
random.seed(642)
data_finetune = (ImageList.from_folder(DATA_FINETUNE_PATH).split_by_rand_pct(
    valid_pct=0.05, seed=20).label_from_folder().transform(
        tfms=fastai.vision.transform.get_transforms(), size=IM_SIZE).databunch(
            bs=BATCH_SIZE,
            num_workers=db_num_workers()).normalize(imagenet_stats))

print(
    f"Data for fine-tuning: {len(data_finetune.train_ds.x)} training images and {len(data_finetune.valid_ds.x)} validation images."
)

learn = cnn_learner(data_finetune, ARCHITECTURE, metrics=[], ps=DROPOUT)

print(learn.model[1])

# By default uses the 2048 dimensional pooling layer as implemented in the paper.
# Optionally can instead keep the 4096-dimensional pooling layer from the ResNet-50 model.
if EMBEDDING_DIM != 4096:
    modules = []
    pooling_dim = 2048
else: