Ejemplo n.º 1
0
def get_dataloader(instance_num=102400):
    #instance_num = 102400
    datas = []
    labels = []
    positions = []
    for i in range(instance_num):
        inp_seq = np.zeros((MAXLEN, len(CHARSET)), dtype='int32')
        cur_len = MAXLEN
        out_seq = np.zeros((MAXLEN, ), dtype='int32')
        pos_encoding = pos_to_query

        for j in range(cur_len):
            pos = np.random.randint(1,
                                    len(CHARSET) -
                                    1)  # not generate '@' and '-'
            inp_seq[j][pos] = 1
            out_seq[cur_len - 1 - j] = pos

        datas.append(inp_seq)
        labels.append(out_seq)
        positions.append(pos_encoding)

    reverse_dataset = ArrayDataset(datas, labels, positions)
    random_sampler = RandomSampler(reverse_dataset, batch_size)
    dataloader = DataLoader(reverse_dataset, random_sampler)

    return dataloader
Ejemplo n.º 2
0
def build_dataloader(batch_size, dataset_dir, cfg):
    if cfg.dataset == "VOC2012":
        train_dataset = dataset.PascalVOC(dataset_dir,
                                          cfg.data_type,
                                          order=["image", "mask"])
    elif cfg.dataset == "Cityscapes":
        train_dataset = dataset.Cityscapes(dataset_dir,
                                           "train",
                                           mode='gtFine',
                                           order=["image", "mask"])
    else:
        raise ValueError("Unsupported dataset {}".format(cfg.dataset))

    train_sampler = Infinite(
        RandomSampler(train_dataset, batch_size, drop_last=True))
    train_dataloader = DataLoader(
        train_dataset,
        sampler=train_sampler,
        transform=T.Compose(
            transforms=[
                T.RandomHorizontalFlip(0.5),
                T.RandomResize(scale_range=(0.5, 2)),
                T.RandomCrop(
                    output_size=(cfg.img_height, cfg.img_width),
                    padding_value=[0, 0, 0],
                    padding_maskvalue=255,
                ),
                T.Normalize(mean=cfg.img_mean, std=cfg.img_std),
                T.ToMode(),
            ],
            order=["image", "mask"],
        ),
        num_workers=2,
    )
    return train_dataloader
Ejemplo n.º 3
0
def get_dataloader():
    instance_num = 102400
    datas = []
    labels = []
    masks = []
    for i in range(instance_num):
        cur_len = np.random.randint(MINLEN, MAXLEN + 1)
        inp_seq = np.zeros((MAXLEN + 1, len(CHARSET)), dtype='int32')
        cur_len = MAXLEN
        mask = np.zeros((MAXLEN + 1, ), dtype='int32')
        out_seq = np.zeros((MAXLEN + 1, ), dtype='int32')

        inp_seq[cur_len][len(CHARSET) - 1] = 1
        out_seq[cur_len] = len(CHARSET) - 1
        mask[:cur_len + 1] = 1
        for j in range(cur_len):
            pos = np.random.randint(1,
                                    len(CHARSET) -
                                    1)  # not generate '@' and '-'
            inp_seq[j][pos] = 1
            out_seq[cur_len - 1 - j] = pos

        datas.append(inp_seq)
        labels.append(out_seq)
        masks.append(mask)

    reverse_dataset = ArrayDataset(datas, labels, masks)
    random_sampler = RandomSampler(reverse_dataset, batch_size)
    dataloader = DataLoader(reverse_dataset, random_sampler)

    return dataloader
Ejemplo n.º 4
0
    def __init__(self, input_dimension, num_points, batch_size=16, istrain=True):
        """
        生成如图1所示的二分类数据集,数据集长度为 num_points
        """

        means = [0.1*n for n in range(input_dimension)]
        scales =[1 for n in range(input_dimension)]

        deviation = [0.05 * (-1 if n % 2 ==0 else 1) for n in range(input_dimension)]
        sd = [0.1 * (-1 if n % 2 ==0 else 1) for n in range(input_dimension)]

        alls = []
        for i in range(input_dimension):
            m,s = means[i], scales[i]
            if not istrain:
                m += deviation[i]
                s += sd[i]

            cur = np.random.normal(m, s, num_points).astype(np.float32).reshape(-1, 1)
            print(cur)
            alls.append(cur)

        self.data = np.concatenate(alls, axis=1)

        super().__init__(self.data)
        self.random_sampler = RandomSampler(dataset=self, batch_size=batch_size, seed=1024)
        self.dataloader = DataLoader(dataset=self, sampler=self.random_sampler)
Ejemplo n.º 5
0
def get_loader(dataset, cfg, mode='train'):
    assert mode in ('train', 'eval')
    if mode == 'train':
        sampler = RandomSampler(dataset,
                                batch_size=cfg.data.samples_per_gpu,
                                drop_last=True,
                                seed=0)
        loader = DataLoader(dataset,
                            sampler,
                            num_workers=cfg.data.workers_per_gpu)
    else:
        samples_per_gpu = cfg.data.get('eval_samples_per_gpu',
                                       cfg.data.samples_per_gpu)
        workers_per_gpu = cfg.data.get('eval_workers_per_gpu',
                                       cfg.data.workers_per_gpu)
        if cfg.evaluation.multi_process is True:
            sampler = SequentialSampler(dataset,
                                        batch_size=samples_per_gpu,
                                        drop_last=False)
        else:
            sampler = SequentialSampler(dataset,
                                        batch_size=samples_per_gpu,
                                        drop_last=False,
                                        world_size=1,
                                        rank=0)
        loader = DataLoader(dataset, sampler, num_workers=workers_per_gpu)
    return loader
Ejemplo n.º 6
0
def build_dataloader():
    train_dataset = MNIST(root=gettempdir(), train=True, download=True)
    dataloader = DataLoader(
        train_dataset,
        transform=Compose([Normalize(mean=0.1307 * 255, std=0.3081 * 255), Pad(2), ToMode("CHW"),]),
        sampler=RandomSampler(dataset=train_dataset, batch_size=64),
    )
    return dataloader
Ejemplo n.º 7
0
    def __init__(self, num_points, batch_size=16):
        """
        生成如图1所示的二分类数据集,数据集长度为 num_points
        """

        # 初始化一个维度为 (50000, 2) 的 NumPy 数组。
        # 数组的每一行是一个横坐标和纵坐标都落在 [-1, 1] 区间的一个数据点 (x, y)
        # np.random.seed(2020)
        self.data = np.random.rand(num_points, 2).astype(np.float32) * 2 - 1
        # 为上述 NumPy 数组构建标签。每一行的 (x, y) 如果符合 x*y < 0,则对应标签为1,反之,标签为0
        self.label = np.zeros(num_points, dtype=np.int32)
        for i in range(num_points):
            self.label[i] = 1 if np.prod(self.data[i]) < 0 else 0

        super().__init__(self.data, self.label)
        self.random_sampler = RandomSampler(dataset=self, batch_size=batch_size, seed=1024)
        self.dataloader = DataLoader(dataset=self, sampler=self.random_sampler)
Ejemplo n.º 8
0
def build_sampler(train_dataset, batch_size, aspect_grouping=[1]):
    def _compute_aspect_ratios(dataset):
        aspect_ratios = []
        for i in range(len(dataset)):
            info = dataset.get_img_info(i)
            aspect_ratios.append(info["height"] / info["width"])
        return aspect_ratios

    def _quantize(x, bins):
        return list(map(lambda y: bisect.bisect_right(sorted(bins), y), x))

    if len(aspect_grouping) == 0:
        return Infinite(RandomSampler(train_dataset, batch_size, drop_last=True))

    aspect_ratios = _compute_aspect_ratios(train_dataset)
    group_ids = _quantize(aspect_ratios, aspect_grouping)
    return Infinite(GroupedRandomSampler(train_dataset, batch_size, group_ids))
Ejemplo n.º 9
0

@trace
def train_func(data, label, net=None, optimizer=None):
    net.train()
    pred = net(data)
    loss = F.cross_entropy_with_softmax(pred, label)
    optimizer.backward(loss)
    return pred, loss


train_dataset = u_data("./data/train", order=["image", "mask"])
dataloader = DataLoader(train_dataset,
                        transform=Compose([ToMode('CHW')]),
                        sampler=RandomSampler(dataset=train_dataset,
                                              batch_size=4,
                                              drop_last=True))

unet = Unet(1, 4)
optimizer = optim.SGD(unet.parameters(), lr=0.05)

trace.enabled = True

total_epochs = 50
loss_src = 100000000
for epoch in range(total_epochs):
    total_loss = 0
    correct = 0
    total = 0
    sta = time.time()
Ejemplo n.º 10
0
from megengine.data.dataset import MNIST
from megengine.data.transform import RandomResizedCrop, Normalize, ToMode, Pad, Compose
import megengine.optimizer as optim

mge.set_log_file('log.txt')
logger = mge.get_logger(__name__)

#logdir = "logs/scalars/" + datetime.now().strftime("%Y%m%d-%H%M%S")

# dataset
root_dir = '/data/.cache/dataset/MNIST'
mnist_train_dataset = MNIST(root=root_dir, train=True, download=False)

mnist_test_dataset = MNIST(root=root_dir, train=False, download=False)

random_sampler = RandomSampler(dataset=mnist_train_dataset, batch_size=256)
sequential_sampler = SequentialSampler(dataset=mnist_test_dataset,
                                       batch_size=256)

mnist_train_dataloader = DataLoader(
    dataset=mnist_train_dataset,
    sampler=random_sampler,
    transform=Compose([
        RandomResizedCrop(output_size=28),
        # mean 和 std 分别是 MNIST 数据的均值和标准差,图片数值范围是 0~255
        #Normalize(mean=0.1307*255, std=0.3081*255),
        #Pad(2),
        # 'CHW'表示把图片由 (height, width, channel) 格式转换成 (channel, height, width) 格式
        #ToMode('CHW'),
    ]))
mnist_test_dataloader = DataLoader(