コード例 #1
0
def fetch_dataloaders(dataset_name,
                      batch_size,
                      device,
                      flip_toy_var_order=False,
                      toy_train_size=25000,
                      toy_test_size=5000):

    # grab datasets
    if dataset_name in ['GAS', 'POWER', 'HEPMASS', 'MINIBOONE',
                        'BSDS300']:  # use the constructors by MAF authors
        dataset = load_dataset(dataset_name)()

        # join train and val data again
        train_data = np.concatenate((dataset.trn.x, dataset.val.x), axis=0)

        # construct datasets
        train_dataset = TensorDataset(
            torch.from_numpy(train_data.astype(np.float32)))
        test_dataset = TensorDataset(
            torch.from_numpy(dataset.tst.x.astype(np.float32)))

        input_dims = dataset.n_dims
        label_size = None
        lam = None

    elif dataset_name in ['MNIST']:
        dataset = load_dataset(dataset_name)()

        # join train and val data again
        train_x = np.concatenate((dataset.trn.x, dataset.val.x),
                                 axis=0).astype(np.float32)
        train_y = np.concatenate((dataset.trn.y, dataset.val.y),
                                 axis=0).astype(np.float32)

        # construct datasets
        train_dataset = TensorDataset(torch.from_numpy(train_x),
                                      torch.from_numpy(train_y))
        test_dataset = TensorDataset(
            torch.from_numpy(dataset.tst.x.astype(np.float32)),
            torch.from_numpy(dataset.tst.y.astype(np.float32)))

        input_dims = dataset.n_dims
        label_size = 10
        lam = dataset.alpha

    elif dataset_name in ['TOY', 'MOONS']:  # use own constructors
        train_dataset = load_dataset(dataset_name)(toy_train_size,
                                                   flip_toy_var_order)
        test_dataset = load_dataset(dataset_name)(toy_test_size,
                                                  flip_toy_var_order)

        input_dims = train_dataset.input_size
        label_size = train_dataset.label_size
        lam = None

    # imaging dataset pulled from torchvision
    elif dataset_name in ['CIFAR10']:
        label_size = 10

        # MAF logit trainform parameter (cf. MAF paper 4.3
        lam = 1e-6 if dataset_name == 'mnist' else 5e-2

        # MAF paper converts image data to logit space via transform described in section 4.3
        image_transforms = T.Compose([
            T.ToTensor(),
            T.Lambda(lambda x: x + torch.rand(*x.shape) / 256.
                     ),  # dequantize (cf MAF paper)
            T.Lambda(lambda x: logit(lam + (1 - 2 * lam) * x))
        ])  # to logit space (cf MAF paper)
        target_transforms = T.Lambda(
            lambda x: partial(one_hot, label_size=label_size)(x))

        train_dataset = load_dataset(dataset_name)(
            root=datasets.root,
            train=True,
            transform=image_transforms,
            target_transform=target_transforms)
        test_dataset = load_dataset(dataset_name)(
            root=datasets.root,
            train=True,
            transform=image_transforms,
            target_transform=target_transforms)

        input_dims = train_dataset[0][0].shape

    else:
        raise ValueError('Unrecognized dataset.')

    # keep input dims, input size and label size
    train_dataset.input_dims = input_dims
    train_dataset.input_size = int(np.prod(input_dims))
    train_dataset.label_size = label_size
    train_dataset.lam = lam

    test_dataset.input_dims = input_dims
    test_dataset.input_size = int(np.prod(input_dims))
    test_dataset.label_size = label_size
    test_dataset.lam = lam

    # construct dataloaders
    kwargs = {
        'num_workers': 1,
        'pin_memory': True
    } if device.type is 'cuda' else {}

    train_loader = DataLoader(train_dataset,
                              batch_size,
                              shuffle=True,
                              **kwargs)
    test_loader = DataLoader(test_dataset, batch_size, shuffle=False, **kwargs)

    return train_loader, test_loader
コード例 #2
0
def fetch_dataloaders(dataset_name,
                      batch_size,
                      device,
                      flip_toy_var_order=False,
                      toy_train_size=25000,
                      toy_test_size=5000):

    # grab datasets
    if dataset_name in ['GAS', 'POWER', 'HEPMASS', 'MINIBOONE',
                        'BSDS300']:  # use the constructors by MAF authors
        dataset = load_dataset(dataset_name)()

        # join train and val data again
        train_data = np.concatenate((dataset.trn.x, dataset.val.x), axis=0)

        # construct datasets
        train_dataset = TensorDataset(
            torch.from_numpy(train_data.astype(np.float32)))
        test_dataset = TensorDataset(
            torch.from_numpy(dataset.tst.x.astype(np.float32)))

        input_dims = dataset.n_dims
        label_size = None
        lam = None

    elif dataset_name in ['MNIST']:
        dataset = load_dataset(dataset_name)()

        # join train and val data again
        train_x = np.concatenate((dataset.trn.x, dataset.val.x),
                                 axis=0).astype(np.float32)
        train_y = np.concatenate((dataset.trn.y, dataset.val.y),
                                 axis=0).astype(np.float32)

        # construct datasets
        train_dataset = TensorDataset(torch.from_numpy(train_x),
                                      torch.from_numpy(train_y))
        test_dataset = TensorDataset(
            torch.from_numpy(dataset.tst.x.astype(np.float32)),
            torch.from_numpy(dataset.tst.y.astype(np.float32)))

        input_dims = dataset.n_dims
        label_size = 10
        lam = dataset.alpha

    elif dataset_name in ['TOY', 'MOONS']:  # use own constructors
        train_dataset = load_dataset(dataset_name)(toy_train_size,
                                                   flip_toy_var_order)
        test_dataset = load_dataset(dataset_name)(toy_test_size,
                                                  flip_toy_var_order)

        input_dims = train_dataset.input_size
        label_size = train_dataset.label_size
        lam = None

    # imaging dataset pulled from torchvision
    elif dataset_name in ['CIFAR10']:
        label_size = 10

        # MAF logit trainform parameter (cf. MAF paper 4.3
        lam = 1e-6 if dataset_name == 'mnist' else 5e-2

        # MAF paper converts image data to logit space via transform described in section 4.3
        image_transforms = T.Compose([
            T.ToTensor(),
            T.Lambda(lambda x: x + torch.rand(*x.shape) / 256.
                     ),  # dequantize (cf MAF paper)
            T.Lambda(lambda x: logit(lam + (1 - 2 * lam) * x))
        ])  # to logit space (cf MAF paper)
        target_transforms = T.Lambda(
            lambda x: partial(one_hot, label_size=label_size)(x))

        train_dataset = load_dataset(dataset_name)(
            root=datasets.root,
            train=True,
            transform=image_transforms,
            target_transform=target_transforms)
        test_dataset = load_dataset(dataset_name)(
            root=datasets.root,
            train=True,
            transform=image_transforms,
            target_transform=target_transforms)

        input_dims = train_dataset[0][0].shape

    elif dataset_name in ["TEMPERATURE0", "TEMPERATURE1", "TEMPERATURE2"]:
        data_dict = np.load(
            "data/temperature/Land_and_Ocean_LatLong1_agg_size_180.npz")
        temp = data_dict[dataset_name]  # (month, location)
        print(f"number of month: {temp.shape[0]}")
        n_train = int(temp.shape[0] * 4 / 5)
        train_temp = temp[:n_train]
        test_temp = temp[n_train:]

        # construct datasets
        n_conditions = 2

        def create_dataset(dataset, n_conditions):
            x = dataset[n_conditions:]
            n = dataset.shape[0]
            y = np.concatenate([
                dataset[i:(n - n_conditions + i)] for i in range(n_conditions)
            ],
                               axis=1)
            return TensorDataset(torch.from_numpy(x), torch.from_numpy(y))

        train_dataset = create_dataset(train_temp, n_conditions)
        test_dataset = create_dataset(test_temp, n_conditions)
        input_dims = temp.shape[1]
        print(f"number of locations = {input_dims}")
        label_size = temp.shape[1] * n_conditions
        lam = None

    else:
        raise ValueError('Unrecognized dataset.')

    # keep input dims, input size and label size
    train_dataset.input_dims = input_dims
    train_dataset.input_size = int(np.prod(input_dims))
    train_dataset.label_size = label_size
    train_dataset.lam = lam

    test_dataset.input_dims = input_dims
    test_dataset.input_size = int(np.prod(input_dims))
    test_dataset.label_size = label_size
    test_dataset.lam = lam

    # construct dataloaders
    kwargs = {
        'num_workers': 1,
        'pin_memory': True
    } if device.type is 'cuda' else {}

    train_loader = DataLoader(train_dataset,
                              batch_size,
                              shuffle=True,
                              drop_last=True,
                              **kwargs)
    test_loader = DataLoader(test_dataset,
                             batch_size,
                             shuffle=False,
                             drop_last=True,
                             **kwargs)

    return train_loader, test_loader
コード例 #3
0
ファイル: data.py プロジェクト: wcheung8/normalizing_flows
def fetch_dataloaders(dataset_name,
                      batch_size,
                      device,
                      flip_toy_var_order=False,
                      toy_train_size=25000,
                      toy_test_size=5000):
    # grab datasets

    if dataset_name in ['GAS', 'POWER', 'HEPMASS', 'MINIBOONE',
                        'BSDS300']:  # use the constructors by MAF authors
        dataset = load_dataset(dataset_name)()

        # join train and val data again
        train_data = np.concatenate((dataset.trn.x, dataset.val.x), axis=0)

        # construct datasets
        train_dataset = TensorDataset(
            torch.from_numpy(train_data.astype(np.float32)))
        test_dataset = TensorDataset(
            torch.from_numpy(dataset.tst.x.astype(np.float32)))

        input_dims = dataset.n_dims
        label_size = None
        lam = None

    elif dataset_name in ['MNIST']:
        dataset = load_dataset(dataset_name)()

        # join train and val data again
        train_x = np.concatenate((dataset.trn.x, dataset.val.x),
                                 axis=0).astype(np.float32)
        train_y = np.concatenate((dataset.trn.y, dataset.val.y),
                                 axis=0).astype(np.float32)

        # construct datasets
        train_dataset = TensorDataset(torch.from_numpy(train_x),
                                      torch.from_numpy(train_y))
        test_dataset = TensorDataset(
            torch.from_numpy(dataset.tst.x.astype(np.float32)),
            torch.from_numpy(dataset.tst.y.astype(np.float32)))

        input_dims = dataset.n_dims
        label_size = 10
        lam = dataset.alpha

    elif dataset_name in ['TOY', 'MOONS']:  # use own constructors
        train_dataset = load_dataset(dataset_name)(toy_train_size,
                                                   flip_toy_var_order)
        test_dataset = load_dataset(dataset_name)(toy_test_size,
                                                  flip_toy_var_order)

        input_dims = train_dataset.input_size
        label_size = train_dataset.label_size
        lam = None

    # imaging dataset pulled from torchvision
    elif dataset_name in ['CIFAR10']:
        label_size = 10

        # MAF logit trainform parameter (cf. MAF paper 4.3
        lam = 1e-6 if dataset_name == 'mnist' else 5e-2

        # MAF paper converts image data to logit space via transform described in section 4.3
        image_transforms = T.Compose([
            T.ToTensor(),
            T.Lambda(lambda x: x + torch.rand(*x.shape) / 256.
                     ),  # dequantize (cf MAF paper)
            T.Lambda(lambda x: logit(lam + (1 - 2 * lam) * x))
        ])  # to logit space (cf MAF paper)
        target_transforms = T.Lambda(
            lambda x: partial(one_hot, label_size=label_size)(x))

        train_dataset = load_dataset(dataset_name)(
            root=datasets.root,
            train=True,
            transform=image_transforms,
            target_transform=target_transforms)
        test_dataset = load_dataset(dataset_name)(
            root=datasets.root,
            train=True,
            transform=image_transforms,
            target_transform=target_transforms)

        input_dims = train_dataset[0][0].shape

    elif dataset_name == "synthia":

        train_dataset = airsimLoader(split="train",
                                     subsplits=['async_fog_000_clear'],
                                     img_size=(64, 64))
        test_dataset = [
            airsimLoader(split="val",
                         subsplits=["async_fog_000_clear"],
                         img_size=(64, 64)),
            airsimLoader(split="val",
                         subsplits=["async_fog_050_clear"],
                         img_size=(64, 64)),
            airsimLoader(split="val",
                         subsplits=["async_fog_100_clear"],
                         img_size=(64, 64)),
            airsimLoader(
                split="val",
                subsplits=[
                    "async_fog_000_clear__{'channel':'rgb','type':'snow','value':'3'}"
                ],
                img_size=(64, 64)),
            airsimLoader(
                split="val",
                subsplits=[
                    "async_fog_000_clear__{'channel':'rgb','type':'blackoutNoise','value':'20'}"
                ],
                img_size=(64, 64)),
        ]
        # import ipdb; ipdb.set_trace()
        input_dims = train_dataset[0][0].shape
        label_size = None
        lam = None
    elif dataset_name == "cityscapes":

        x = fishyscapesLoader()
        import ipdb
        ipdb.set_trace()

        train_dataset = cityscapesLoader('/home/datasets/',
                                         split="train",
                                         img_size=(64, 128))
        test_dataset = [
            cityscapesLoader('/home/datasets/',
                             split="val",
                             img_size=(64, 128)),
            fishyscapesLoader()
        ]
        input_dims = train_dataset[0][0].shape
        label_size = None
        lam = None

    print(input_dims)
    # keep input dims, input size and label size
    train_dataset.input_dims = input_dims
    train_dataset.input_size = int(np.prod(input_dims))
    train_dataset.label_size = label_size
    train_dataset.lam = lam

    for t in test_dataset:
        t.input_dims = input_dims
        t.input_size = int(np.prod(input_dims))
        t.label_size = label_size
        t.lam = lam
        if 'subsplits' in t:
            t.name = t.subsplits[0]
        else:
            t.name = 'val'

    # construct dataloaders
    kwargs = {
        'num_workers': 4,
        'pin_memory': True
    } if device.type is 'cuda' else {}

    train_loader = DataLoader(train_dataset,
                              batch_size,
                              shuffle=True,
                              **kwargs)
    test_loader = [
        DataLoader(t, batch_size, shuffle=False, **kwargs)
        for t in test_dataset
    ]

    return train_loader, test_loader