def get_loader(dataset, cfg, mode='train'): assert mode in ('train', 'eval') if mode == 'train': sampler = RandomSampler(dataset, batch_size=cfg.data.samples_per_gpu, drop_last=True, seed=0) loader = DataLoader(dataset, sampler, num_workers=cfg.data.workers_per_gpu) else: samples_per_gpu = cfg.data.get('eval_samples_per_gpu', cfg.data.samples_per_gpu) workers_per_gpu = cfg.data.get('eval_workers_per_gpu', cfg.data.workers_per_gpu) if cfg.evaluation.multi_process is True: sampler = SequentialSampler(dataset, batch_size=samples_per_gpu, drop_last=False) else: sampler = SequentialSampler(dataset, batch_size=samples_per_gpu, drop_last=False, world_size=1, rank=0) loader = DataLoader(dataset, sampler, num_workers=workers_per_gpu) return loader
def build_dataloader(rank, world_size, data_root, ann_file): val_dataset = COCOJoints(data_root, ann_file, image_set="val2017", order=("image", "boxes", "info")) val_sampler = SequentialSampler(val_dataset, 1, world_size=world_size, rank=rank) val_dataloader = DataLoader( val_dataset, sampler=val_sampler, num_workers=4, transform=T.Compose( transforms=[ T.Normalize(mean=cfg.img_mean, std=cfg.img_std), ExtendBoxes( cfg.test_x_ext, cfg.test_y_ext, cfg.input_shape[1] / cfg.input_shape[0], random_extend_prob=0, ), RandomBoxAffine( degrees=(0, 0), scale=(1, 1), output_shape=cfg.input_shape, rotate_prob=0, scale_prob=0, ), T.ToMode(), ], order=("image", "boxes", "info"), ), ) return val_dataloader
def get_loader(dataset, cfg): samples_per_gpu = cfg.data.test_samples_per_gpu workers_per_gpu = cfg.data.test_workers_per_gpu sampler = SequentialSampler(dataset, batch_size=samples_per_gpu, drop_last=False) loader = DataLoader(dataset, sampler, num_workers=workers_per_gpu) return loader
def build_dataloader(rank, world_size, data_dir): val_dataset = COCODataset( os.path.join(data_dir, "val2017"), os.path.join(data_dir, "annotations/instances_val2017.json"), order=["image", "info"], ) val_sampler = SequentialSampler(val_dataset, 1, world_size=world_size, rank=rank) val_dataloader = DataLoader(val_dataset, sampler=val_sampler, num_workers=2) return val_dataloader
def build_dataset(root=Path('/home/zqh/data/omniglot-py'), nway=5, kshot=1, kquery=1, batch_size=32): train_ds = OmniglotDataset(root, nway, kshot, kquery, mode='train') train_smp = SequentialSampler(train_ds, drop_last=True, batch_size=batch_size) train_loader = DataLoader(train_ds, sampler=train_smp, num_workers=4) val_ds = OmniglotDataset(root, nway, kshot, kquery, mode='val') val_smp = SequentialSampler(train_ds, drop_last=True, batch_size=batch_size) val_loader = DataLoader(val_ds, sampler=val_smp, num_workers=4) return train_loader, val_loader
def build_dataloader(rank, world_size, data_dir, cfg): val_dataset = data_mapper[cfg.test_dataset["name"]]( os.path.join(data_dir, cfg.test_dataset["name"], cfg.test_dataset["root"]), os.path.join(data_dir, cfg.test_dataset["name"], cfg.test_dataset["ann_file"]), order=["image", "info"], ) val_sampler = SequentialSampler(val_dataset, 1, world_size=world_size, rank=rank) val_dataloader = DataLoader(val_dataset, sampler=val_sampler, num_workers=2) return val_dataloader
from megengine.data.transform import RandomResizedCrop, Normalize, ToMode, Pad, Compose import megengine.optimizer as optim mge.set_log_file('log.txt') logger = mge.get_logger(__name__) #logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S") # dataset root_dir = '/data/.cache/dataset/MNIST' mnist_train_dataset = MNIST(root=root_dir, train=True, download=False) mnist_test_dataset = MNIST(root=root_dir, train=False, download=False) random_sampler = RandomSampler(dataset=mnist_train_dataset, batch_size=256) sequential_sampler = SequentialSampler(dataset=mnist_test_dataset, batch_size=256) mnist_train_dataloader = DataLoader( dataset=mnist_train_dataset, sampler=random_sampler, transform=Compose([ RandomResizedCrop(output_size=28), # mean 和 std 分别是 MNIST 数据的均值和标准差,图片数值范围是 0~255 #Normalize(mean=0.1307*255, std=0.3081*255), #Pad(2), # 'CHW'表示把图片由 (height, width, channel) 格式转换成 (channel, height, width) 格式 #ToMode('CHW'), ])) mnist_test_dataloader = DataLoader( dataset=mnist_test_dataset, sampler=sequential_sampler,
print(len(train_dataset), len(train_label)) print(len(test_dataset), len(test_label)) from typing import Tuple from megengine.data.dataset import Dataset class BostonTrainDataset(Dataset): def __init__(self): self.data = train_dataset self.label = train_label def __getitem__(self, index: int) -> Tuple: return self.data[index], self.label[index] def __len__(self) -> int: return len(self.data) boston_train_dataset = BostonTrainDataset() print(len(boston_train_dataset)) from megengine.data import DataLoader from megengine.data import SequentialSampler sequential_sampler = SequentialSampler(dataset=boston_train_dataset, batch_size=100) train_dataloader = DataLoader(dataset=boston_train_dataset, sampler=sequential_sampler) for batch_data, batch_label in train_dataloader: print(batch_data.shape, batch_label.shape, len(train_dataloader)) break
# 静态图比动态图快了很多 trace.enabled = True if __name__ == '__main__': np.random.seed(39) train_dataset = XORDataset(30000) test_dataset = XORDataset(10000) # 这里为什么要传两次train_dataset train_sampler = RandomSampler(dataset=train_dataset, batch_size=32, drop_last=True) train_loader = DataLoader(train_dataset, sampler=train_sampler) test_sampler = SequentialSampler(dataset=test_dataset, batch_size=32, drop_last=False) test_loader = DataLoader(test_dataset, sampler=test_sampler) # draw_dataset(train_dataset) model = build_model() optimizer = optim.SGD( model.parameters(), lr=0.01, ) data = mge.tensor() label = mge.tensor(dtype='int32') total_epochs = 10