def prepare_loaders_impl( location: str, train_ratio: float = 0.9, vote_ratio: float = 0.05, batch_size: int = 32, dp_enabled: bool = False, ) -> Tuple[PrefetchDataset, PrefetchDataset, PrefetchDataset]: data_folder = get_data(location) images = pickle.load(open(Path(data_folder) / IMAGE_FL, "rb")) labels = pickle.load(open(Path(data_folder) / LABEL_FL, "rb")) n_cases = int(train_ratio * len(images)) n_vote_cases = int(vote_ratio * len(images)) train_loader = _make_loader(images[:n_cases], labels[:n_cases], batch_size, dp_enabled=dp_enabled) vote_loader = _make_loader(images[n_cases:n_cases + n_vote_cases], labels[n_cases:n_cases + n_vote_cases], batch_size) test_loader = _make_loader(images[n_cases + n_vote_cases:], labels[n_cases + n_vote_cases:], batch_size) return train_loader, vote_loader, test_loader
def prepare_data_loaders( location: str, train_ratio: float = 0.9, vote_ratio: float = 0.05, batch_size: int = 32, ) -> Tuple[PrefetchDataset, PrefetchDataset, PrefetchDataset]: """ Load training data from folders and create train and test dataloader :param location: Path to training dataset :param train_ratio: What portion of train_data should be used as test set :param batch_size: :return: Tuple of train_loader and test_loader """ data_folder = get_data(location) images = pickle.load(open(Path(data_folder) / IMAGE_FL, "rb")) labels = pickle.load(open(Path(data_folder) / LABEL_FL, "rb")) n_cases = int(train_ratio * len(images)) n_vote_cases = int(vote_ratio * len(images)) train_loader = _make_loader(images[:n_cases], labels[:n_cases], batch_size) vote_loader = _make_loader(images[n_cases:n_cases + n_vote_cases], labels[n_cases:n_cases + n_vote_cases], batch_size) test_loader = _make_loader(images[n_cases + n_vote_cases:], labels[n_cases + n_vote_cases:], batch_size) return train_loader, vote_loader, test_loader
def prepare_data_loaders( location: str, train_ratio: float = 0.8, vote_ratio: float = 0.1, ) -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray], Tuple[ np.ndarray, np.ndarray]]: """ Load training data from folders and create train and test arrays :param location: Path to training dataset :param train_ratio: What portion of train_data should be used as test set :return: Tuple of tuples (train_data, train_labels), (test_data, test_loaders) """ data_folder = get_data(location) data = pickle.load(open(Path(data_folder) / DATA_FL, "rb")) labels = pickle.load(open(Path(data_folder) / LABEL_FL, "rb")) n_cases = int(train_ratio * len(data)) n_vote_cases = int(vote_ratio * len(data)) assert (n_cases > 0), "There are no cases" train_data, train_labels = data[:n_cases], labels[:n_cases] vote_data, vote_labels = data[n_cases:n_cases + n_vote_cases], labels[n_cases:n_cases + n_vote_cases] test_data, test_labels = data[n_cases + n_vote_cases:], labels[n_cases + n_vote_cases:] return (train_data, train_labels), (vote_data, vote_labels), (test_data, test_labels)
def prepare_data_loaders( location: str, train_ratio: float = 0.8, vote_ratio: float = 0.1, batch_size: int = 8, no_cuda: bool = False, ) -> Tuple[DataLoader, DataLoader, DataLoader]: """ Load training data from folders and create train and test dataloader :param location: Path to training dataset :param train_ratio: What portion of train_data should be used as test set :param batch_size: Batch size :param no_cuda: Disable GPU computing :return: Tuple of train_loader and test_loader """ cuda = not no_cuda and torch.cuda.is_available() DataloaderKwargs = TypedDict('DataloaderKwargs', { 'num_workers': int, 'pin_memory': bool }, total=False) loader_kwargs: DataloaderKwargs = { 'num_workers': 1, 'pin_memory': True } if cuda else {} data_folder = get_data(location) data = pickle.load(open(Path(data_folder) / DATA_FL, "rb")) labels = pickle.load(open(Path(data_folder) / LABEL_FL, "rb")) n_cases = int(train_ratio * len(data)) n_vote_cases = int(vote_ratio * len(data)) assert (n_cases > 0), "There are no cases" train_loader = _make_loader(data[:n_cases], labels[:n_cases], batch_size, **loader_kwargs) vote_loader = _make_loader(data[n_cases:n_cases + n_vote_cases], labels[n_cases:n_cases + n_vote_cases], batch_size) test_loader = _make_loader(data[n_cases + n_vote_cases:], labels[n_cases + n_vote_cases:], batch_size, **loader_kwargs) return train_loader, vote_loader, test_loader
def prepare_data_loaders(train_folder: str, train_ratio: float = 0.9, batch_size: int = 32, **_kwargs) -> Tuple[PrefetchDataset, PrefetchDataset]: """ Load training data from folders and create train and test dataloader :param train_folder: Path to training dataset :param train_ratio: What portion of train_data should be used as test set :param batch_size: :param _kwargs: Residual parameters not used by this function :return: Tuple of train_loader and test_loader """ data_folder = get_data(train_folder) images = pickle.load(open(Path(data_folder) / IMAGE_FL, "rb")) labels = pickle.load(open(Path(data_folder) / LABEL_FL, "rb")) n_cases = int(train_ratio * len(images)) train_loader = _make_loader(images[:n_cases], labels[:n_cases], batch_size) test_loader = _make_loader(images[n_cases:], labels[n_cases:], batch_size) return train_loader, test_loader
def prepare_data_loaders( location: str, test_location: Optional[str] = None, train_ratio: float = 0.96, batch_size: int = 8, no_cuda: bool = False, ) -> Tuple[DataLoader, DataLoader]: """ Load training data from folders and create train and test dataloader :param location: Path to training dataset :param test_location: Path to test dataset :param train_ratio: When test_location is not specified what portion of train_data should be used as test set :param batch_size: :param no_cuda: Disable GPU computing :return: Tuple of train_loader and test_loader """ cuda = not no_cuda and torch.cuda.is_available() DataloaderKwargs = TypedDict('DataloaderKwargs', { 'num_workers': int, 'pin_memory': bool }, total=False) loader_kwargs: DataloaderKwargs = { 'num_workers': 1, 'pin_memory': True } if cuda else {} data_folder = get_data(location) if test_location is not None: local_test_folder = get_data(test_location) train_loader = DataLoader(XrayDataset(data_folder, train=True, train_ratio=1.0), batch_size=batch_size, shuffle=True, **loader_kwargs) test_loader = DataLoader(XrayDataset(local_test_folder, train=True, train_ratio=1.0), batch_size=batch_size, shuffle=True, **loader_kwargs) else: train_loader = DataLoader(XrayDataset(data_folder, train=True, train_ratio=train_ratio), batch_size=batch_size, shuffle=True, **loader_kwargs) test_loader = DataLoader(XrayDataset(data_folder, train=False, train_ratio=train_ratio), batch_size=batch_size, shuffle=True, **loader_kwargs) return train_loader, test_loader