Beispiel #1
0
def prepare_loaders_impl(
    location: str,
    train_ratio: float = 0.9,
    vote_ratio: float = 0.05,
    batch_size: int = 32,
    dp_enabled: bool = False,
) -> Tuple[PrefetchDataset, PrefetchDataset, PrefetchDataset]:
    data_folder = get_data(location)

    images = pickle.load(open(Path(data_folder) / IMAGE_FL, "rb"))
    labels = pickle.load(open(Path(data_folder) / LABEL_FL, "rb"))

    n_cases = int(train_ratio * len(images))
    n_vote_cases = int(vote_ratio * len(images))
    train_loader = _make_loader(images[:n_cases],
                                labels[:n_cases],
                                batch_size,
                                dp_enabled=dp_enabled)
    vote_loader = _make_loader(images[n_cases:n_cases + n_vote_cases],
                               labels[n_cases:n_cases + n_vote_cases],
                               batch_size)
    test_loader = _make_loader(images[n_cases + n_vote_cases:],
                               labels[n_cases + n_vote_cases:], batch_size)

    return train_loader, vote_loader, test_loader
Beispiel #2
0
def prepare_data_loaders(
    location: str,
    train_ratio: float = 0.9,
    vote_ratio: float = 0.05,
    batch_size: int = 32,
) -> Tuple[PrefetchDataset, PrefetchDataset, PrefetchDataset]:
    """
    Load training data from folders and create train and test dataloader

    :param location: Path to training dataset
    :param train_ratio: What portion of train_data should be used as test set
    :param batch_size:
    :return: Tuple of train_loader and test_loader
    """

    data_folder = get_data(location)

    images = pickle.load(open(Path(data_folder) / IMAGE_FL, "rb"))
    labels = pickle.load(open(Path(data_folder) / LABEL_FL, "rb"))

    n_cases = int(train_ratio * len(images))
    n_vote_cases = int(vote_ratio * len(images))
    train_loader = _make_loader(images[:n_cases], labels[:n_cases], batch_size)
    vote_loader = _make_loader(images[n_cases:n_cases + n_vote_cases],
                               labels[n_cases:n_cases + n_vote_cases],
                               batch_size)
    test_loader = _make_loader(images[n_cases + n_vote_cases:],
                               labels[n_cases + n_vote_cases:], batch_size)

    return train_loader, vote_loader, test_loader
Beispiel #3
0
def prepare_data_loaders(
    location: str,
    train_ratio: float = 0.8,
    vote_ratio: float = 0.1,
) -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray], Tuple[
        np.ndarray, np.ndarray]]:
    """
    Load training data from folders and create train and test arrays

    :param location: Path to training dataset
    :param train_ratio: What portion of train_data should be used as test set
    :return: Tuple of tuples (train_data, train_labels), (test_data, test_loaders)
    """

    data_folder = get_data(location)

    data = pickle.load(open(Path(data_folder) / DATA_FL, "rb"))
    labels = pickle.load(open(Path(data_folder) / LABEL_FL, "rb"))

    n_cases = int(train_ratio * len(data))
    n_vote_cases = int(vote_ratio * len(data))
    assert (n_cases > 0), "There are no cases"

    train_data, train_labels = data[:n_cases], labels[:n_cases]
    vote_data, vote_labels = data[n_cases:n_cases +
                                  n_vote_cases], labels[n_cases:n_cases +
                                                        n_vote_cases]
    test_data, test_labels = data[n_cases +
                                  n_vote_cases:], labels[n_cases +
                                                         n_vote_cases:]

    return (train_data, train_labels), (vote_data, vote_labels), (test_data,
                                                                  test_labels)
Beispiel #4
0
def prepare_data_loaders(
    location: str,
    train_ratio: float = 0.8,
    vote_ratio: float = 0.1,
    batch_size: int = 8,
    no_cuda: bool = False,
) -> Tuple[DataLoader, DataLoader, DataLoader]:
    """
    Load training data from folders and create train and test dataloader

    :param location: Path to training dataset
    :param train_ratio: What portion of train_data should be used as test set
    :param batch_size: Batch size
    :param no_cuda: Disable GPU computing
    :return: Tuple of train_loader and test_loader
    """

    cuda = not no_cuda and torch.cuda.is_available()
    DataloaderKwargs = TypedDict('DataloaderKwargs', {
        'num_workers': int,
        'pin_memory': bool
    },
                                 total=False)
    loader_kwargs: DataloaderKwargs = {
        'num_workers': 1,
        'pin_memory': True
    } if cuda else {}

    data_folder = get_data(location)

    data = pickle.load(open(Path(data_folder) / DATA_FL, "rb"))
    labels = pickle.load(open(Path(data_folder) / LABEL_FL, "rb"))

    n_cases = int(train_ratio * len(data))
    n_vote_cases = int(vote_ratio * len(data))
    assert (n_cases > 0), "There are no cases"

    train_loader = _make_loader(data[:n_cases], labels[:n_cases], batch_size,
                                **loader_kwargs)
    vote_loader = _make_loader(data[n_cases:n_cases + n_vote_cases],
                               labels[n_cases:n_cases + n_vote_cases],
                               batch_size)
    test_loader = _make_loader(data[n_cases + n_vote_cases:],
                               labels[n_cases + n_vote_cases:], batch_size,
                               **loader_kwargs)

    return train_loader, vote_loader, test_loader
Beispiel #5
0
def prepare_data_loaders(train_folder: str,
                         train_ratio: float = 0.9,
                         batch_size: int = 32,
                         **_kwargs) -> Tuple[PrefetchDataset, PrefetchDataset]:
    """
    Load training data from folders and create train and test dataloader

    :param train_folder: Path to training dataset
    :param train_ratio: What portion of train_data should be used as test set
    :param batch_size:
    :param _kwargs: Residual parameters not used by this function
    :return: Tuple of train_loader and test_loader
    """

    data_folder = get_data(train_folder)

    images = pickle.load(open(Path(data_folder) / IMAGE_FL, "rb"))
    labels = pickle.load(open(Path(data_folder) / LABEL_FL, "rb"))

    n_cases = int(train_ratio * len(images))
    train_loader = _make_loader(images[:n_cases], labels[:n_cases], batch_size)
    test_loader = _make_loader(images[n_cases:], labels[n_cases:], batch_size)

    return train_loader, test_loader
Beispiel #6
0
def prepare_data_loaders(
    location: str,
    test_location: Optional[str] = None,
    train_ratio: float = 0.96,
    batch_size: int = 8,
    no_cuda: bool = False,
) -> Tuple[DataLoader, DataLoader]:
    """
    Load training data from folders and create train and test dataloader

    :param location: Path to training dataset
    :param test_location: Path to test dataset
    :param train_ratio: When test_location is not specified what portion of train_data should be used as test set
    :param batch_size:
    :param no_cuda: Disable GPU computing
    :return: Tuple of train_loader and test_loader
    """

    cuda = not no_cuda and torch.cuda.is_available()
    DataloaderKwargs = TypedDict('DataloaderKwargs', {
        'num_workers': int,
        'pin_memory': bool
    },
                                 total=False)
    loader_kwargs: DataloaderKwargs = {
        'num_workers': 1,
        'pin_memory': True
    } if cuda else {}

    data_folder = get_data(location)

    if test_location is not None:
        local_test_folder = get_data(test_location)

        train_loader = DataLoader(XrayDataset(data_folder,
                                              train=True,
                                              train_ratio=1.0),
                                  batch_size=batch_size,
                                  shuffle=True,
                                  **loader_kwargs)

        test_loader = DataLoader(XrayDataset(local_test_folder,
                                             train=True,
                                             train_ratio=1.0),
                                 batch_size=batch_size,
                                 shuffle=True,
                                 **loader_kwargs)
    else:
        train_loader = DataLoader(XrayDataset(data_folder,
                                              train=True,
                                              train_ratio=train_ratio),
                                  batch_size=batch_size,
                                  shuffle=True,
                                  **loader_kwargs)

        test_loader = DataLoader(XrayDataset(data_folder,
                                             train=False,
                                             train_ratio=train_ratio),
                                 batch_size=batch_size,
                                 shuffle=True,
                                 **loader_kwargs)

    return train_loader, test_loader