Esempio n. 1
0
def get_loader(dataset, cfg, mode='train'):
    assert mode in ('train', 'eval')
    if mode == 'train':
        sampler = RandomSampler(dataset,
                                batch_size=cfg.data.samples_per_gpu,
                                drop_last=True,
                                seed=0)
        loader = DataLoader(dataset,
                            sampler,
                            num_workers=cfg.data.workers_per_gpu)
    else:
        samples_per_gpu = cfg.data.get('eval_samples_per_gpu',
                                       cfg.data.samples_per_gpu)
        workers_per_gpu = cfg.data.get('eval_workers_per_gpu',
                                       cfg.data.workers_per_gpu)
        if cfg.evaluation.multi_process is True:
            sampler = SequentialSampler(dataset,
                                        batch_size=samples_per_gpu,
                                        drop_last=False)
        else:
            sampler = SequentialSampler(dataset,
                                        batch_size=samples_per_gpu,
                                        drop_last=False,
                                        world_size=1,
                                        rank=0)
        loader = DataLoader(dataset, sampler, num_workers=workers_per_gpu)
    return loader
Esempio n. 2
0
def get_dataloader():
    instance_num = 102400
    datas = []
    labels = []
    masks = []
    for i in range(instance_num):
        cur_len = np.random.randint(MINLEN, MAXLEN + 1)
        inp_seq = np.zeros((MAXLEN + 1, len(CHARSET)), dtype='int32')
        cur_len = MAXLEN
        mask = np.zeros((MAXLEN + 1, ), dtype='int32')
        out_seq = np.zeros((MAXLEN + 1, ), dtype='int32')

        inp_seq[cur_len][len(CHARSET) - 1] = 1
        out_seq[cur_len] = len(CHARSET) - 1
        mask[:cur_len + 1] = 1
        for j in range(cur_len):
            pos = np.random.randint(1,
                                    len(CHARSET) -
                                    1)  # not generate '@' and '-'
            inp_seq[j][pos] = 1
            out_seq[cur_len - 1 - j] = pos

        datas.append(inp_seq)
        labels.append(out_seq)
        masks.append(mask)

    reverse_dataset = ArrayDataset(datas, labels, masks)
    random_sampler = RandomSampler(reverse_dataset, batch_size)
    dataloader = DataLoader(reverse_dataset, random_sampler)

    return dataloader
Esempio n. 3
0
def build_dataloader(batch_size, data_dir, cfg):
    train_dataset = data_mapper[cfg.train_dataset["name"]](
        os.path.join(data_dir, cfg.train_dataset["name"],
                     cfg.train_dataset["root"]),
        os.path.join(data_dir, cfg.train_dataset["name"],
                     cfg.train_dataset["ann_file"]),
        remove_images_without_annotations=True,
        order=["image", "boxes", "boxes_category", "info"],
    )
    train_sampler = build_sampler(train_dataset, batch_size)
    train_dataloader = DataLoader(
        train_dataset,
        sampler=train_sampler,
        transform=T.Compose(
            transforms=[
                T.ShortestEdgeResize(cfg.train_image_short_size,
                                     cfg.train_image_max_size),
                T.RandomHorizontalFlip(),
                T.ToMode(),
            ],
            order=["image", "boxes", "boxes_category"],
        ),
        collator=DetectionPadCollator(),
        num_workers=2,
    )
    return {"train": train_dataloader}
Esempio n. 4
0
def build_dataloader(rank, world_size, data_root, ann_file):
    val_dataset = COCOJoints(data_root,
                             ann_file,
                             image_set="val2017",
                             order=("image", "boxes", "info"))
    val_sampler = SequentialSampler(val_dataset,
                                    1,
                                    world_size=world_size,
                                    rank=rank)
    val_dataloader = DataLoader(
        val_dataset,
        sampler=val_sampler,
        num_workers=4,
        transform=T.Compose(
            transforms=[
                T.Normalize(mean=cfg.img_mean, std=cfg.img_std),
                ExtendBoxes(
                    cfg.test_x_ext,
                    cfg.test_y_ext,
                    cfg.input_shape[1] / cfg.input_shape[0],
                    random_extend_prob=0,
                ),
                RandomBoxAffine(
                    degrees=(0, 0),
                    scale=(1, 1),
                    output_shape=cfg.input_shape,
                    rotate_prob=0,
                    scale_prob=0,
                ),
                T.ToMode(),
            ],
            order=("image", "boxes", "info"),
        ),
    )
    return val_dataloader
Esempio n. 5
0
    def __init__(self, input_dimension, num_points, batch_size=16, istrain=True):
        """
        生成如图1所示的二分类数据集,数据集长度为 num_points
        """

        means = [0.1*n for n in range(input_dimension)]
        scales =[1 for n in range(input_dimension)]

        deviation = [0.05 * (-1 if n % 2 ==0 else 1) for n in range(input_dimension)]
        sd = [0.1 * (-1 if n % 2 ==0 else 1) for n in range(input_dimension)]

        alls = []
        for i in range(input_dimension):
            m,s = means[i], scales[i]
            if not istrain:
                m += deviation[i]
                s += sd[i]

            cur = np.random.normal(m, s, num_points).astype(np.float32).reshape(-1, 1)
            print(cur)
            alls.append(cur)

        self.data = np.concatenate(alls, axis=1)

        super().__init__(self.data)
        self.random_sampler = RandomSampler(dataset=self, batch_size=batch_size, seed=1024)
        self.dataloader = DataLoader(dataset=self, sampler=self.random_sampler)
Esempio n. 6
0
def build_dataloader(dataset_dir, cfg):
    val_dataset = PseudoDetectionDataset(length=5000, order=["image", "info"])
    val_sampler = InferenceSampler(val_dataset, 1)
    val_dataloader = DataLoader(val_dataset,
                                sampler=val_sampler,
                                num_workers=2)
    return val_dataloader
Esempio n. 7
0
def build_dataloader(dataset_dir, cfg):
    if cfg.dataset == "VOC2012":
        val_dataset = EvalPascalVOC(
            dataset_dir,
            "val",
            order=["image", "mask", "info"]
        )
    elif cfg.dataset == "Cityscapes":
        val_dataset = dataset.Cityscapes(
            dataset_dir,
            "val",
            mode="gtFine",
            order=["image", "mask", "info"]
        )
    else:
        raise ValueError("Unsupported dataset {}".format(cfg.dataset))

    val_sampler = InferenceSampler(val_dataset, 1)
    val_dataloader = DataLoader(
        val_dataset,
        sampler=val_sampler,
        transform=T.Normalize(
            mean=cfg.img_mean, std=cfg.img_std, order=["image", "mask"]
        ),
        num_workers=2,
    )
    return val_dataloader
Esempio n. 8
0
def build_dataloader(batch_size, dataset_dir, cfg):
    if cfg.dataset == "VOC2012":
        train_dataset = dataset.PascalVOC(dataset_dir,
                                          cfg.data_type,
                                          order=["image", "mask"])
    elif cfg.dataset == "Cityscapes":
        train_dataset = dataset.Cityscapes(dataset_dir,
                                           "train",
                                           mode='gtFine',
                                           order=["image", "mask"])
    else:
        raise ValueError("Unsupported dataset {}".format(cfg.dataset))

    train_sampler = Infinite(
        RandomSampler(train_dataset, batch_size, drop_last=True))
    train_dataloader = DataLoader(
        train_dataset,
        sampler=train_sampler,
        transform=T.Compose(
            transforms=[
                T.RandomHorizontalFlip(0.5),
                T.RandomResize(scale_range=(0.5, 2)),
                T.RandomCrop(
                    output_size=(cfg.img_height, cfg.img_width),
                    padding_value=[0, 0, 0],
                    padding_maskvalue=255,
                ),
                T.Normalize(mean=cfg.img_mean, std=cfg.img_std),
                T.ToMode(),
            ],
            order=["image", "mask"],
        ),
        num_workers=2,
    )
    return train_dataloader
def get_dataloader(instance_num=102400):
    #instance_num = 102400
    datas = []
    labels = []
    positions = []
    for i in range(instance_num):
        inp_seq = np.zeros((MAXLEN, len(CHARSET)), dtype='int32')
        cur_len = MAXLEN
        out_seq = np.zeros((MAXLEN, ), dtype='int32')
        pos_encoding = pos_to_query

        for j in range(cur_len):
            pos = np.random.randint(1,
                                    len(CHARSET) -
                                    1)  # not generate '@' and '-'
            inp_seq[j][pos] = 1
            out_seq[cur_len - 1 - j] = pos

        datas.append(inp_seq)
        labels.append(out_seq)
        positions.append(pos_encoding)

    reverse_dataset = ArrayDataset(datas, labels, positions)
    random_sampler = RandomSampler(reverse_dataset, batch_size)
    dataloader = DataLoader(reverse_dataset, random_sampler)

    return dataloader
Esempio n. 10
0
def build_dataloader():
    train_dataset = MNIST(root=gettempdir(), train=True, download=True)
    dataloader = DataLoader(
        train_dataset,
        transform=Compose([Normalize(mean=0.1307 * 255, std=0.3081 * 255), Pad(2), ToMode("CHW"),]),
        sampler=RandomSampler(dataset=train_dataset, batch_size=64),
    )
    return dataloader
Esempio n. 11
0
def get_loader(dataset, cfg):
    samples_per_gpu = cfg.data.test_samples_per_gpu
    workers_per_gpu = cfg.data.test_workers_per_gpu
    sampler = SequentialSampler(dataset,
                                batch_size=samples_per_gpu,
                                drop_last=False)
    loader = DataLoader(dataset, sampler, num_workers=workers_per_gpu)
    return loader
Esempio n. 12
0
def build_dataloader(rank, world_size, dataset_dir, cfg):
    val_dataset = data_mapper[cfg.test_dataset["name"]](
        os.path.join(dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["root"]),
        os.path.join(dataset_dir, cfg.test_dataset["name"], cfg.test_dataset["ann_file"]),
        order=["image", "info"],
    )
    val_sampler = InferenceSampler(val_dataset, 1, world_size=world_size, rank=rank)
    val_dataloader = DataLoader(val_dataset, sampler=val_sampler, num_workers=2)
    return val_dataloader
Esempio n. 13
0
def build_dataloader(rank, world_size, data_dir):
    val_dataset = COCODataset(
        os.path.join(data_dir, "val2017"),
        os.path.join(data_dir, "annotations/instances_val2017.json"),
        order=["image", "info"],
    )
    val_sampler = SequentialSampler(val_dataset, 1, world_size=world_size, rank=rank)
    val_dataloader = DataLoader(val_dataset, sampler=val_sampler, num_workers=2)
    return val_dataloader
Esempio n. 14
0
def build_dataset(root=Path('/home/zqh/data/omniglot-py'),
                  nway=5,
                  kshot=1,
                  kquery=1,
                  batch_size=32):

    train_ds = OmniglotDataset(root, nway, kshot, kquery, mode='train')
    train_smp = SequentialSampler(train_ds,
                                  drop_last=True,
                                  batch_size=batch_size)
    train_loader = DataLoader(train_ds, sampler=train_smp, num_workers=4)

    val_ds = OmniglotDataset(root, nway, kshot, kquery, mode='val')
    val_smp = SequentialSampler(train_ds,
                                drop_last=True,
                                batch_size=batch_size)
    val_loader = DataLoader(val_ds, sampler=val_smp, num_workers=4)

    return train_loader, val_loader
Esempio n. 15
0
def fetch_dataloader(params):
    input_transform = fetch_input_transform()
    spatial_transform = fetch_spatial_transform(params)

    benchmark_path_gof_clean = "dataset/GOF_Clean.npy"
    benchmark_path_gof_final = "dataset/GOF_Final.npy"

    if params.dataset_type == "GOF":
        train_ds = BaseDataset(input_transform, spatial_transform)
        val_ds = TestDataset(benchmark_path_gof_clean, input_transform)
        test_ds = ConcatDataset(
            [TestDataset(benchmark_path_gof_clean, input_transform),
             TestDataset(benchmark_path_gof_final, input_transform)])

    dataloaders = {}
    # add defalt train data loader
    train_sampler = RandomSampler(train_ds, batch_size=params.train_batch_size, drop_last=True)
    train_dl = DataLoader(train_ds, train_sampler, num_workers=params.num_workers)
    dataloaders["train"] = train_dl

    # chosse val or test data loader for evaluate
    for split in ["val", "test"]:
        if split in params.eval_type:
            if split == "val":
                val_sampler = SequentialSampler(val_ds, batch_size=params.eval_batch_size)
                dl = DataLoader(val_ds, val_sampler, num_workers=params.num_workers)
            elif split == "test":
                test_sampler = SequentialSampler(test_ds, batch_size=params.eval_batch_size)
                dl = DataLoader(test_ds, test_sampler, num_workers=params.num_workers)
            else:
                raise ValueError("Unknown eval_type in params, should in [val, test]")
            dataloaders[split] = dl
        else:
            dataloaders[split] = None

    return dataloaders
Esempio n. 16
0
    def __init__(self, num_points, batch_size=16):
        """
        生成如图1所示的二分类数据集,数据集长度为 num_points
        """

        # 初始化一个维度为 (50000, 2) 的 NumPy 数组。
        # 数组的每一行是一个横坐标和纵坐标都落在 [-1, 1] 区间的一个数据点 (x, y)
        # np.random.seed(2020)
        self.data = np.random.rand(num_points, 2).astype(np.float32) * 2 - 1
        # 为上述 NumPy 数组构建标签。每一行的 (x, y) 如果符合 x*y < 0,则对应标签为1,反之,标签为0
        self.label = np.zeros(num_points, dtype=np.int32)
        for i in range(num_points):
            self.label[i] = 1 if np.prod(self.data[i]) < 0 else 0

        super().__init__(self.data, self.label)
        self.random_sampler = RandomSampler(dataset=self, batch_size=batch_size, seed=1024)
        self.dataloader = DataLoader(dataset=self, sampler=self.random_sampler)
Esempio n. 17
0
 def get_dataloader(self, examples, batch_size, is_random=False):
     features = convert_examples_to_features(
         examples, self.label_list, self.args.max_seq_length, self.tokenizer
     )
     all_input_ids, all_input_mask, all_segment_ids, all_label_ids = self.to_inputs(
         features
     )
     dataset = ArrayDataset(
         all_input_ids, all_input_mask, all_segment_ids, all_label_ids
     )
     if is_random:
         sampler = RandomSampler(
             dataset=dataset, batch_size=batch_size, drop_last=True
         )
     else:
         sampler = SequentialSampler(
             dataset=dataset, batch_size=batch_size, drop_last=True
         )
     dataloader = DataLoader(dataset=dataset, sampler=sampler,)
     return dataloader, len(features)
Esempio n. 18
0
def build_dataloader(batch_size, data_dir, cfg):
    train_dataset = build_dataset(data_dir, cfg)
    train_sampler = build_sampler(train_dataset, batch_size)
    train_dataloader = DataLoader(
        train_dataset,
        sampler=train_sampler,
        transform=T.Compose(
            transforms=[
                T.ShortestEdgeResize(
                    cfg.train_image_short_size,
                    cfg.train_image_max_size,
                    sample_style="choice",
                ),
                T.RandomHorizontalFlip(),
                T.ToMode(),
            ],
            order=["image", "boxes", "boxes_category"],
        ),
        collator=DetectionPadCollator(),
        num_workers=2,
    )
    return train_dataloader
Esempio n. 19
0
# dataset
root_dir = '/data/.cache/dataset/MNIST'
mnist_train_dataset = MNIST(root=root_dir, train=True, download=False)

mnist_test_dataset = MNIST(root=root_dir, train=False, download=False)

random_sampler = RandomSampler(dataset=mnist_train_dataset, batch_size=256)
sequential_sampler = SequentialSampler(dataset=mnist_test_dataset,
                                       batch_size=256)

mnist_train_dataloader = DataLoader(
    dataset=mnist_train_dataset,
    sampler=random_sampler,
    transform=Compose([
        RandomResizedCrop(output_size=28),
        # mean 和 std 分别是 MNIST 数据的均值和标准差,图片数值范围是 0~255
        #Normalize(mean=0.1307*255, std=0.3081*255),
        #Pad(2),
        # 'CHW'表示把图片由 (height, width, channel) 格式转换成 (channel, height, width) 格式
        #ToMode('CHW'),
    ]))
mnist_test_dataloader = DataLoader(
    dataset=mnist_test_dataset,
    sampler=sequential_sampler,
)

# model
from model import get_net

net = get_net()
Esempio n. 20
0
print(len(train_dataset), len(train_label))
print(len(test_dataset), len(test_label))

from typing import Tuple
from megengine.data.dataset import Dataset

class BostonTrainDataset(Dataset):
    def __init__(self):
        self.data = train_dataset
        self.label = train_label

    def __getitem__(self, index: int) -> Tuple:
        return self.data[index], self.label[index]

    def __len__(self) -> int:
        return len(self.data)

boston_train_dataset = BostonTrainDataset()
print(len(boston_train_dataset))

from megengine.data import DataLoader
from megengine.data import SequentialSampler

sequential_sampler = SequentialSampler(dataset=boston_train_dataset, batch_size=100)
train_dataloader = DataLoader(dataset=boston_train_dataset, sampler=sequential_sampler)

for batch_data, batch_label in train_dataloader:
    print(batch_data.shape, batch_label.shape, len(train_dataloader))
    break
Esempio n. 21
0
from megengine.data import DataLoader
from megengine.data.transform import ToMode, Pad, Normalize, Compose
from megengine.data.sampler import RandomSampler, SequentialSampler

# 如果使用 MegStudio 环境,请将 MNIST_DATA_PATH 为 /home/megstudio/dataset/MNIST/
MNIST_DATA_PATH = "./datasets/MNIST/"

# 获取训练数据集,如果本地没有数据集,请将 download 参数设置为 True
train_dataset = MNIST(root=MNIST_DATA_PATH, train=True, download=False)
test_dataset = MNIST(root=MNIST_DATA_PATH, train=False, download=False)

batch_size = 64
# 创建 Sampler
train_sampler = RandomSampler(train_dataset, batch_size=batch_size)
test_sampler = SequentialSampler(test_dataset, batch_size=batch_size)

# 数据预处理方式
transform = Compose([
    Normalize(mean=0.1307 * 255, std=0.3081 * 255),
    Pad(2),
    ToMode('CHW'),
])

# 创建 Dataloader
train_dataloader = DataLoader(train_dataset, train_sampler, transform)
test_dataloader = DataLoader(test_dataset, test_sampler, transform)

for X, y in train_dataloader:
    print("Shape of X: ", X.shape)  # [N, C, H, W]
    print("Shape of y: ", y.shape, y.dtype)
    break
Esempio n. 22
0
path = "unet_j.mge"


@trace
def train_func(data, label, net=None, optimizer=None):
    net.train()
    pred = net(data)
    loss = F.cross_entropy_with_softmax(pred, label)
    optimizer.backward(loss)
    return pred, loss


train_dataset = u_data("./data/train", order=["image", "mask"])
dataloader = DataLoader(train_dataset,
                        transform=Compose([ToMode('CHW')]),
                        sampler=RandomSampler(dataset=train_dataset,
                                              batch_size=4,
                                              drop_last=True))

unet = Unet(1, 4)
optimizer = optim.SGD(unet.parameters(), lr=0.05)

trace.enabled = True

total_epochs = 50
loss_src = 100000000
for epoch in range(total_epochs):
    total_loss = 0
    correct = 0
    total = 0
    sta = time.time()
Esempio n. 23
0
    return logits, loss


# 静态图比动态图快了很多
trace.enabled = True

if __name__ == '__main__':
    np.random.seed(39)
    train_dataset = XORDataset(30000)
    test_dataset = XORDataset(10000)

    # 这里为什么要传两次train_dataset
    train_sampler = RandomSampler(dataset=train_dataset,
                                  batch_size=32,
                                  drop_last=True)
    train_loader = DataLoader(train_dataset, sampler=train_sampler)

    test_sampler = SequentialSampler(dataset=test_dataset,
                                     batch_size=32,
                                     drop_last=False)
    test_loader = DataLoader(test_dataset, sampler=test_sampler)

    # draw_dataset(train_dataset)
    model = build_model()

    optimizer = optim.SGD(
        model.parameters(),
        lr=0.01,
    )

    data = mge.tensor()