Esempio n. 1
0
def D0217(data_dir, batch_size) -> Tuple[List[Tuple[int, DataLoader, DataLoader]], DataLoader, List[int]]:
    data = np.load(data_dir / "0201.npz")
    X_train = data["X_train"][:, :6]
    Y_train = data["Y_train"]
    X_test = data["X_test"][:, :6]

    X_train = tensor(X_train, dtype=torch.float32)
    Y_train = tensor(Y_train, dtype=torch.long)
    X_test = tensor(X_test, dtype=torch.float32)
    print(X_train.shape, Y_train.shape, X_test.shape)

    # samples_per_cls
    samples_per_cls = [(Y_train == i).sum().item() for i in range(61)]
    print(samples_per_cls)

    ds = C0215(X_train, Y_train)
    ds_test = C0215(X_test)
    dl_kwargs = dict(batch_size=batch_size, num_workers=6, pin_memory=True)
    dl_test = DataLoader(ds_test, **dl_kwargs, shuffle=False)

    skf = StratifiedKFold(n_splits=4, shuffle=True, random_state=261342)
    dl_list = []
    for fold, (train_idx, valid_idx) in enumerate(skf.split(X_train, Y_train), 1):
        ds_train = Subset(ds, train_idx)
        ds_valid = Subset(ds, valid_idx)
        dl_train = DataLoader(ds_train, **dl_kwargs, shuffle=True)
        dl_valid = DataLoader(ds_valid, **dl_kwargs, shuffle=False)
        dl_list.append((fold, dl_train, dl_valid))

    return dl_list, dl_test, samples_per_cls
Esempio n. 2
0
    def __init__(self, args, device):
        super(AVMNISTSearcher, self).__init__(args)

        self.device = device

        # Handle data
        transformer = transforms.Compose([
            avmnist_data.ToTensor(),
            avmnist_data.Normalize((0.1307,), (0.3081,))
        ])

        dataset_training = avmnist_data.AVMnist(args.datadir, transform=transformer, stage='train')
        dataset_validate = avmnist_data.AVMnist(args.datadir, transform=transformer, stage='train')

        train_indices = list(range(0, 55000))
        valid_indices = list(range(55000, 60000))

        train_subset = Subset(dataset_training, train_indices)
        valid_subset = Subset(dataset_validate, valid_indices)

        trainloader = torch.utils.data.DataLoader(train_subset, batch_size=args.batchsize, shuffle=False,
                                                  num_workers=args.num_workers, pin_memory=True)
        devloader = torch.utils.data.DataLoader(valid_subset, batch_size=args.batchsize, shuffle=False,
                                                num_workers=args.num_workers, pin_memory=True)

        self.dataloaders = {'train': trainloader, 'dev': devloader}
Esempio n. 3
0
def better_random_split(dataset_enhanced,
                        dataset_clean,
                        fraction):
    """
    Randomly split a dataset into non-overlapping new datasets of given lengths... but better!

    Arguments:
        dataset_enhanced: The dataset with transforms
        dataset_clean: the dataset with only the necessary transforms and in sequential order
        fraction: the amount of data to be split
    """
    assert fraction < 1, "Fraction should be < 1"
    assert len(dataset_enhanced) == len(dataset_clean)

    total_length = len(dataset_enhanced)
    train_length = int(fraction * total_length)
    eval_length = total_length - train_length

    val_idx0 = np.random.randint(train_length)

    train_idx_lst = np.append(np.arange(val_idx0),
                              np.arange(val_idx0 + eval_length, total_length))
    eval_idx_lst = np.arange(val_idx0, val_idx0 + eval_length)

    np.random.shuffle(train_idx_lst)

    return Subset(dataset_enhanced, train_idx_lst), Subset(dataset_clean, eval_idx_lst)
Esempio n. 4
0
def cv_split(dataset, n, augmentation=None):
    """
    Split the dataset into n non-overlapping new datasets where one is used for testing and
    return an array that contains the sequence of splits.

    Arguments:
        dataset (Dataset): Dataset to be split
        n (int): number of non-overlapping new datasets
        augmentation : augmentations
    """
    cv = KFold(n_splits=n, random_state=0)
    res = []

    for train_index, test_index in cv.split(dataset):
        train_set = Subset(dataset, train_index)
        test_set = Subset(dataset, test_index)

        if augmentation is not None:
            augmented_set = AugmentedDataSet(train_set, augmentation)
        else:
            augmented_set = train_set

        res.append((train_set, test_set, augmented_set))

    return res
Esempio n. 5
0
def get_data_loader():
    data_loaders = {}

    data_set = ProjectDataset(file_path="data/triplet/triple_sentences.csv")
    total = len(data_set)
    train_indices = list(range(0, int(total * 0.9)))
    valid_indices = list(range(int(total * 0.9), len(data_set)))
    train_set = Subset(data_set, train_indices)
    valid_set = Subset(data_set, valid_indices)

    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              shuffle=False,
                              pin_memory=True,
                              num_workers=4)

    dev_loader = DataLoader(valid_set,
                            batch_size=args.batch_size,
                            shuffle=False,
                            pin_memory=True,
                            num_workers=4)

    data_loaders["train_loader"] = train_loader
    data_loaders["dev_loader"] = dev_loader
    return data_loaders
Esempio n. 6
0
def get_train_loader(args):

    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=3),
        transforms.Resize((args.size,args.size)),
        transforms.ToTensor(),
    ])

    if args.dataset == 'CIFAR10':
        dataset = CIFAR10(root=args.data_dir,train=True,
                          transform=transforms.Compose([
                              transforms.Resize((args.size,args.size)),
                              transforms.ToTensor(),
                          ]))
        dataset = Subset(dataset=dataset,indices=random.sample(range(50000),args.data_size))
    elif args.dataset == 'MNIST':
        dataset = MNIST(root=args.data_dir,train=True,transform=transform)
        dataset = Subset(dataset=dataset,indices=random.sample(range(60000),args.data_size))
    else:
        dataset = SmallNORB(root=args.data_dir,train=True,transform=transform)
        dataset = Subset(dataset=dataset,indices=random.sample(range(48600),args.data_size))

    return DataLoader(
        dataset,
        batch_size=args.batch_size,
        shuffle=args.shuffle,
        num_workers=args.workers,
        pin_memory=True
    )
Esempio n. 7
0
def detection_dataloaders(
    data_dir,
    batch_size=1,
    subset_indices=None,
    no_augmentation=False,
    num_workers=0,
):
    train_dataset, test_dataset, num_classes = initialize_detection_datasets(
        data_dir, no_augmentation)

    if subset_indices is not None:
        train_dataset = Subset(train_dataset, subset_indices)
        test_dataset = Subset(test_dataset, subset_indices)

    train_dataloader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        collate_fn=collate_fn,
        num_workers=num_workers,
    )

    test_dataloader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=collate_fn,
        num_workers=num_workers,
    )

    return train_dataloader, test_dataloader, num_classes
Esempio n. 8
0
def D0206_org_base(data_dir, batch_size, augc) -> Tuple[List[Tuple[int, DataLoader, DataLoader]], DataLoader]:
    data = np.load(data_dir / "0206_org.npz")
    X_train = data["X_train"][:, :6]
    Y_train = data["Y_train"]
    X_test = data["X_test"][:, :6]

    X_train = tensor(X_train, dtype=torch.float32)
    Y_train = tensor(Y_train, dtype=torch.long)
    X_test = tensor(X_test, dtype=torch.float32)
    print(X_train.shape, Y_train.shape, X_test.shape)

    ds = augc(X_train, Y_train)
    ds_test = TensorDataset(X_test)
    dl_kwargs = dict(batch_size=batch_size, num_workers=6, pin_memory=True)
    dl_test = DataLoader(ds_test, **dl_kwargs, shuffle=False)

    skf = StratifiedKFold(n_splits=4, shuffle=True, random_state=261342)
    dl_list = []
    for fold, (train_idx, valid_idx) in enumerate(skf.split(X_train, Y_train), 1):
        ds_train = Subset(ds, train_idx)
        ds_valid = Subset(ds, valid_idx)
        dl_train = DataLoader(ds_train, **dl_kwargs, shuffle=True)
        dl_valid = DataLoader(ds_valid, **dl_kwargs, shuffle=False)
        dl_list.append((fold, dl_train, dl_valid))

    return dl_list, dl_test
Esempio n. 9
0
    def get_dataset(self, n_subset=-1):
        cfg = self.cfg

        train_dataset, valid_dataset = self.get_torch_dataset()

        if n_subset > 0:
            train_dataset = Subset(train_dataset, list(range(100)))
            valid_dataset = Subset(valid_dataset, list(range(100)))

        n_train_iteration = len(train_dataset) // cfg.batch_size
        n_valid_iteration = len(valid_dataset) // cfg.batch_size

        train_dataset = self.to_tf_dataset(train_dataset, shuffle=True)
        valid_dataset = self.to_tf_dataset(valid_dataset, shuffle=False)

        iterator = Iterator.from_structure(train_dataset.output_types,
                                           train_dataset.output_shapes)

        train_init_op = iterator.make_initializer(train_dataset)
        valid_init_op = iterator.make_initializer(valid_dataset)
        input_tensor = iterator.get_next()

        return (
            input_tensor,
            (train_init_op, valid_init_op),
            (n_train_iteration, n_valid_iteration),
        )
Esempio n. 10
0
def setup_loaders(valid_ratio, path, batch_size):

    dataset = Zinc(path)

    # split into train and valid
    n_samples = len(dataset)
    idx = np.arange(n_samples)
    train_samples = int((1 - valid_ratio) * n_samples)

    train = idx[:train_samples]
    valid = idx[train_samples:]

    train_dataset = Subset(dataset, train)
    valid_dataset = Subset(dataset, valid)

    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=4,
                              pin_memory=True)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=batch_size,
                              shuffle=False,
                              num_workers=4,
                              pin_memory=True)

    return train_loader, valid_loader, dataset
Esempio n. 11
0
def get_dataloader(dataset):
    """
    Make dataloader from dataset for training.
    """
    train_size = int(
        len(dataset) * (1.0 - CONFIG["training"]["validation_split"]))
    data_loader_train = torch.utils.data.DataLoader(
        Subset(dataset, list(range(0, train_size))),
        batch_size=CONFIG["training"]["batch_size"],
        shuffle=CONFIG["training"]["shuffle"],
        drop_last=True,
    )
    data_loader_val = torch.utils.data.DataLoader(
        Subset(dataset, list(range(train_size, len(dataset)))),
        batch_size=CONFIG["training"]["batch_size"],
        shuffle=False,
        drop_last=False,
    )

    # dataloader of training data for evaluation only
    data_loader_eval_train = torch.utils.data.DataLoader(
        Subset(dataset, list(range(0, train_size))),
        batch_size=CONFIG["training"]["batch_size"],
        shuffle=False,
        drop_last=False,
    )

    return data_loader_train, data_loader_val, data_loader_eval_train
Esempio n. 12
0
def data_loader_with_split(root, train_split=0.9, batch_size=256, val_label_file='./val_label'):
    input_transform = get_transform()
    dataset_tr = CustomDataset(root, input_transform, target_transform, aug=True)
    dataset_vl = CustomDataset(root, input_transform, target_transform, aug=False)
    split_size = int(len(dataset_tr) * train_split)

    random.seed(1958)
    l = list(range(len(dataset_tr)))
    random.shuffle(l)
    train_idxs = l[:split_size]
    valid_idxs = l[split_size:]
    train_set = Subset(dataset_tr, train_idxs)
    valid_set = Subset(dataset_vl, valid_idxs)
    # train_set, valid_set = data.random_split(dataset, [split_size, len(dataset) - split_size])
    print(len(train_set), len(valid_set))
    tr_loader = data.DataLoader(dataset=train_set,
                                batch_size=batch_size,
                                num_workers=4, pin_memory=True,
                                shuffle=True)
    val_loader = data.DataLoader(dataset=valid_set,
                                 batch_size=batch_size,
                                 num_workers=4, pin_memory=True,
                                 shuffle=False)

    gt_labels = [valid_set[idx][1] for idx in range(len(valid_set))]
    gt_labels_string = [','.join([str(s.numpy()) for s in l]) for l in list(gt_labels)]
    with open(val_label_file, 'w') as file_writer:
        file_writer.write("\n".join(gt_labels_string))

    print('data_loader_with_split-')
    return tr_loader, val_loader, val_label_file
Esempio n. 13
0
def get_data_loader():
    data_loaders = {}

    data_set = ProjectDataset(file_path="data/pair/new_split_pair_all.txt")

    split_index = int(len(data_set) * 0.9)
    train_indices = list(range(0, split_index))
    valid_indices = list(range(split_index, len(data_set)))
    train_set = Subset(data_set, train_indices)
    valid_set = Subset(data_set, valid_indices)

    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              shuffle=True,
                              pin_memory=True,
                              num_workers=4)

    dev_loader = DataLoader(valid_set,
                            batch_size=args.batch_size,
                            shuffle=True,
                            pin_memory=True,
                            num_workers=4)

    data_loaders["train_loader"] = train_loader
    data_loaders["dev_loader"] = dev_loader
    return data_loaders
Esempio n. 14
0
def get_cross_validation_kth_fold(dataset: Dataset,
                                  k: int,
                                  n: int,
                                  start_seed: int = 17) -> tuple:
    """Splits the dataset into train and test subsets, accordingly to the
    selected number of the cross-validation fold.

    Parameters
    ----------
    dataset: Dataset
        Dataset to split in.
    k : int
        Number of the fold to return.
    n : int
        Number of folds in the cross-validation.
    seed : int
        seed

    Returns
    -------
    tuple
        The kth cross-validation fold.
    """
    seed(start_seed)
    ids = arange(len(dataset))
    split_size = int(len(dataset) / n)

    split_train_ids = concatenate(
        (ids[:split_size * k], ids[split_size * (k + 1):]))
    split_test_ids = ids[split_size * k:split_size * (k + 1)]

    train_subdatset = Subset(dataset, split_train_ids)
    test_subdatset = Subset(dataset, split_test_ids)

    return train_subdatset, test_subdatset
Esempio n. 15
0
def get_data_loader():
    data_loaders = {}

    data_set = ProjectDataset(
        anchor_file="data/triplet_encode/triple_sentence0_encode.npy",
        positive_file="data/triplet_encode/triple_sentence1_encode.npy",
        negative_file="data/triplet_encode/triple_sentence2_encode.npy")

    total = len(data_set)
    train_indices = list(range(0, int(total * 0.9)))
    valid_indices = list(range(int(total * 0.9), len(data_set)))
    train_set = Subset(data_set, train_indices)
    valid_set = Subset(data_set, valid_indices)

    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              shuffle=True,
                              pin_memory=True,
                              num_workers=12)

    dev_loader = DataLoader(valid_set,
                            batch_size=args.batch_size,
                            shuffle=True,
                            pin_memory=True,
                            num_workers=12)

    data_loaders["train_loader"] = train_loader
    data_loaders["dev_loader"] = dev_loader
    return data_loaders
Esempio n. 16
0
    def train_test_split_curve(self, num_train_folds):
        train_indices = []
        for i in range(num_train_folds):
            train_indices += self._fold_indices[i]
        dev_indices = self._fold_indices[3]
        test_indices = self._fold_indices[4]

        return (Subset(self, train_indices), Subset(self, dev_indices), Subset(self, test_indices))
Esempio n. 17
0
def main(args=None):
    if args is None:
        args = argument_paser()

    # Set experiment id
    exp_id = str(uuid.uuid4())[:8] if args.exp_id is None else args.exp_id
    print(f'Experiment Id: {exp_id}', flush=True)

    # Fix seed
    torch.manual_seed(args.seed)

    # Config gpu
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # Prepare data
    dataset = MovingMnistDataset()
    train_index, valid_index = train_test_split(range(len(dataset)),
                                                test_size=0.3)
    train_loader = DataLoader(Subset(dataset, train_index),
                              batch_size=args.batch_size,
                              shuffle=True)
    valid_loader = DataLoader(Subset(dataset, valid_index),
                              batch_size=args.test_batch_size,
                              shuffle=False)
    loaders = {"train": train_loader, "valid": valid_loader}

    model = ConvLSTMEncoderPredictor(image_size=(64, 64)).to(device)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 betas=(0.9, 0.999))
    criterion = nn.MSELoss()

    runner = SupervisedRunner(device=catalyst.utils.get_device())
    runner.train(
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=None,
        loaders=loaders,
        # model will be saved to {logdir}/checkpoints
        logdir=os.path.join(args.log_dir, exp_id),
        callbacks=[
            CheckpointCallback(save_n_best=args.n_saved),
            EarlyStoppingCallback(
                patience=args.es_patience,
                metric="loss",
                minimize=True,
            )
        ],
        num_epochs=args.epochs,
        main_metric="loss",
        minimize_metric=True,
        fp16=None,
        verbose=True)

    return exp_id, model
Esempio n. 18
0
def process_raw_dataset(train_raw_dataset,
                        train_labels,
                        test_raw_dataset,
                        test_labels,
                        raw_n_cls,
                        dataset_name,
                        n_cls=None,
                        data_frac=None,
                        biased_cls=None):
    """
    Parameters
    ----------
    train_raw_dataset
        The dataset from DataLoader
    test_raw_dataset
        The dataset from DataLoader
    raw_n_cls: int
        The number of classes the raw dataset has.
    dataset_name: str
        The name of dataset i.e. "cifar", "svhn", "imagenet"
    n_cls: int
        The number of classes you want the learning model to solve
    data_frac: float
        How many proportions the learning model uses to train itself. (0. to 1.)
    biased_cls: list of float (n_cls, )
        The index corresponds to the index of classes.
        How many data to use in training. Each element of the list must be 0. to 1.

    Returns
    -------
    train_dataset, test_dataset
    """

    print("Processing raw dataset...")

    if n_cls is None and data_frac is None and biased_cls is None:
        return Subset(train_raw_dataset,
                      train_labels[1]), Subset(test_raw_dataset,
                                               test_labels[1])
    else:
        if n_cls is not None:
            print("The number of classes: {} -> {}".format(raw_n_cls, n_cls))
            train_labels, test_labels = get_small_class(
                train_labels, test_labels, n_cls)
        if data_frac is not None:
            n_subtrain = int(np.ceil(len(train_labels[0]) * data_frac))
            print("Subsampling: {} images".format(n_subtrain))
            train_labels = np.array([tl[:n_subtrain] for tl in train_labels])
        if biased_cls is not None:
            print("Biased labels")
            train_labels = get_biased_class(train_labels, biased_cls, n_cls,
                                            raw_n_cls)

        return Subset(train_raw_dataset,
                      train_labels[1]), Subset(test_raw_dataset,
                                               test_labels[1])
Esempio n. 19
0
def get_train_val_loaders(dataset,
                          datapath=DATA_PATH,
                          train_size=None,
                          val_size=5000,
                          train_batch_size=100,
                          val_batch_size=1000,
                          kwargs=None,
                          train_transform=None,
                          val_transform=None,
                          train_shuffle=True,
                          val_shuffle=False):
    """Support MNIST and CIFAR10"""
    if kwargs is None:
        kwargs = {}
    if train_transform is None:
        train_transform = transforms.ToTensor()
    if val_transform is None:
        val_transform = transforms.ToTensor()

    datapath = os.path.join(datapath, dataset)

    trainset = datasets.__dict__[dataset](datapath,
                                          train=True,
                                          download=True,
                                          transform=train_transform)

    if train_size is not None:
        assert train_size + val_size <= len(trainset)

    if val_size > 0:
        indices = list(range(len(trainset)))
        trainset = Subset(trainset, indices[val_size:])

        valset = datasets.__dict__[dataset](datapath,
                                            train=True,
                                            download=True,
                                            transform=val_transform)
        valset = Subset(valset, indices[:val_size])
        val_loader = torch.utils.data.DataLoader(valset,
                                                 batch_size=val_batch_size,
                                                 shuffle=val_shuffle,
                                                 **kwargs)

    else:
        val_loader = None

    if train_size is not None:
        trainset = Subset(trainset, list(range(train_size)))

    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=train_batch_size,
                                               shuffle=train_shuffle,
                                               **kwargs)

    return train_loader, val_loader
Esempio n. 20
0
def get_data(train_batch_size=100,
             test_batch_size=100,
             train_range=None,
             test_range=None,
             random_labels=False,
             seed=0):
    """Get CIFAR10 data. If random_labels=True, randomizes the labels. 
	Inputs: train_batch_size (default: 100), test_batch_size (default:100), train_range (default: None), test_range (default: None), random_labels (default: False), seed (default: None)
	Return: train dataset, test dataset, train loader, test loader
	"""
    normalize = transforms.Normalize(
        mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
        std=[x / 255.0 for x in [63.0, 62.1, 66.7]])
    transform_train = transforms.Compose([transforms.ToTensor(), normalize])
    transform_test = transforms.Compose([transforms.ToTensor(), normalize])
    train_dataset = datasets.CIFAR10(root='data',
                                     train=True,
                                     transform=transform_train,
                                     download=True)
    test_dataset = datasets.CIFAR10(root='data',
                                    train=False,
                                    transform=transform_test,
                                    download=True)
    if random_labels:
        print("generating random labels with seed {}".format(seed))
        np.random.seed(seed)

        probability_of_random = 1.0
        labels = np.array(train_dataset.targets)
        mask = np.random.rand(
            len(labels)
        ) <= probability_of_random  #create mask of length labels, where entries drawn from [0,1].
        rnd_labels = np.random.choice(
            10, mask.sum())  #create random labels 1-10 of length of mask
        labels[mask] = rnd_labels
        labels = [int(x) for x in labels]
        train_dataset.targets = labels  #assign new random labels to dataset
        np.savetxt("random_labels.txt", labels)

    if train_range:
        train_dataset = Subset(train_dataset, train_range)

    if test_range:
        test_dataset = Subset(test_dataset, test_range)

    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=train_batch_size,
                              num_workers=4,
                              shuffle=False)
    test_loader = DataLoader(dataset=test_dataset,
                             batch_size=test_batch_size,
                             num_workers=4,
                             shuffle=False)
    return train_dataset, test_dataset, train_loader, test_loader
Esempio n. 21
0
def split_train_and_test(dataset):
    n_samples = len(dataset)
    train_size = round(n_samples * 0.7)

    subset1_indices = list(range(0, train_size))
    subset2_indices = list(range(train_size, n_samples))

    train_dataset = Subset(dataset, subset1_indices)
    test_dataset = Subset(dataset, subset2_indices)

    return train_dataset, test_dataset
Esempio n. 22
0
def randomly_split_into_two_datasets(dataset, length_of_first):
    import random
    indices = [i for i in range(len(dataset))]
    random.shuffle(indices)

    first_dataset = indices[:length_of_first]
    second_dataset = indices[length_of_first:]
    first_dataset.sort()
    second_dataset.sort()

    return [Subset(dataset, first_dataset), Subset(dataset, second_dataset)]
Esempio n. 23
0
def devide(dataset, test_rate=0.2):
    total_size = len(dataset)
    train_size = int(total_size * (1 - test_rate))

    train_indices = list(range(0, train_size))
    test_indices = list(range(train_size, total_size))

    train_dataset = Subset(dataset, train_indices)
    test_dataset = Subset(dataset, test_indices)

    return train_dataset, test_dataset
Esempio n. 24
0
def split_dataset(dataset, n, seed=0):
    """
    Return a pair of datasets corresponding to a random split of the given
    dataset, with n data points in the first dataset and the rest in the last,
    using the given random seed
    """
    assert (n <= len(dataset))
    idxes = list(range(len(dataset)))
    np.random.RandomState(seed).shuffle(idxes)
    subset_1 = idxes[:n]
    subset_2 = idxes[n:]
    return Subset(dataset, subset_1), Subset(dataset, subset_2)
Esempio n. 25
0
def main(args=None):
    if args is None:
        args = argument_paser()

    # Set experiment id
    exp_id = str(uuid.uuid4())[:8] if args.exp_id is None else args.exp_id
    print(f'Experiment Id: {exp_id}', flush=True)

    # Fix seed
    torch.manual_seed(args.seed)

    # Set logger
    log_writer = SummaryWriter(log_dir=os.path.join(
        args.log_dir, exp_id)) if args.log_dir is not None else None

    # Prepare data
    dataset = MovingMnistDataset()
    train_index, valid_index = train_test_split(range(len(dataset)),
                                                test_size=0.3)
    train_loader = DataLoader(Subset(dataset, train_index),
                              batch_size=args.batch_size,
                              shuffle=True)
    valid_loader = DataLoader(Subset(dataset, valid_index),
                              batch_size=args.test_batch_size,
                              shuffle=False)

    # Prepare model
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = ConvLSTMEncoderPredictor(image_size=(64, 64)).to(device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 betas=(0.9, 0.999))
    criterion = nn.MSELoss()

    run(exp_id=exp_id,
        epochs=args.epochs,
        model=model,
        criterion=criterion,
        optimizer=optimizer,
        scheduler=None,
        train_loader=train_loader,
        valid_loader=valid_loader,
        device=device,
        writer=log_writer,
        log_interval=args.log_interval,
        n_saved=args.n_saved,
        save_dir=args.save_model_path,
        es_patience=args.es_patience)

    log_writer.close()

    return exp_id, model
Esempio n. 26
0
def get_data_loaders(args):
    dataset = IKDataset(args.kinematics_pose_csv, args.joint_states_csv)
    train_size = int(len(dataset) * args.train_val_ratio)
    train_dataset = Subset(dataset, list(range(0, train_size)))
    val_dataset = Subset(dataset, list(range(train_size, len(dataset))))
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)
    val_loader = DataLoader(val_dataset,
                            batch_size=args.batch_size,
                            shuffle=True)

    return train_loader, val_loader
Esempio n. 27
0
 def intra_class_split(self, ratio, shuffle):
     n_class = len(self.visible_classes)
     classes = [self.subset([i]) for i in range(n_class)]
     n_samples = [len(clss) for clss in classes]
     part_a, part_b = [], []
     for clss, size in zip(classes, n_samples):
         idx = list(range(size))
         if shuffle:
             random.shuffle(idx)
         thres = int(size * ratio)
         part_a.append(Subset(clss, idx[:thres]))
         part_b.append(Subset(clss, idx[thres:]))
     return (ConcatDatasetWithNewLabel([a for a in part_a]),
             ConcatDatasetWithNewLabel([b for b in part_b]))
Esempio n. 28
0
 def _split_data_to_k_fold_cv_subsets(dataset: Dataset,
                                      n_fold: int) -> List[CVPair]:
     n_validation_data = len(dataset) // n_fold
     perm = np.random.permutation(len(dataset))
     cv_subsets = []
     for i in range(n_fold):
         boolean_index = np.zeros(len(dataset)).astype(bool)
         p = perm[i * n_validation_data:(i + 1) * n_validation_data]
         boolean_index[p] = True
         train_subset = Subset(dataset, np.where(~boolean_index)[0])
         validation_subset = Subset(dataset, np.where(boolean_index)[0])
         cv_subsets.append(
             CVPair(train=train_subset, validation=validation_subset))
     return cv_subsets
Esempio n. 29
0
def get_train_val_loaders(
    root_path: str,
    train_transforms: Callable,
    val_transforms: Callable,
    batch_size: int = 16,
    num_workers: int = 8,
    val_batch_size: Optional[int] = None,
    limit_train_num_samples: Optional[int] = None,
    limit_val_num_samples: Optional[int] = None,
) -> Tuple[DataLoader, DataLoader, DataLoader]:

    train_ds = ImageNet(
        root_path, split="train", transform=lambda sample: train_transforms(image=sample)["image"], loader=opencv_loader
    )
    val_ds = ImageNet(
        root_path, split="val", transform=lambda sample: val_transforms(image=sample)["image"], loader=opencv_loader
    )

    if limit_train_num_samples is not None:
        np.random.seed(limit_train_num_samples)
        train_indices = np.random.permutation(len(train_ds))[:limit_train_num_samples]
        train_ds = Subset(train_ds, train_indices)

    if limit_val_num_samples is not None:
        np.random.seed(limit_val_num_samples)
        val_indices = np.random.permutation(len(val_ds))[:limit_val_num_samples]
        val_ds = Subset(val_ds, val_indices)

    # random samples for evaluation on training dataset
    if len(val_ds) < len(train_ds):
        np.random.seed(len(val_ds))
        train_eval_indices = np.random.permutation(len(train_ds))[: len(val_ds)]
        train_eval_ds = Subset(train_ds, train_eval_indices)
    else:
        train_eval_ds = train_ds

    train_loader = idist.auto_dataloader(
        train_ds, shuffle=True, batch_size=batch_size, num_workers=num_workers, drop_last=True,
    )

    val_batch_size = batch_size * 4 if val_batch_size is None else val_batch_size
    val_loader = idist.auto_dataloader(
        val_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False,
    )

    train_eval_loader = idist.auto_dataloader(
        train_eval_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False,
    )

    return train_loader, val_loader, train_eval_loader
Esempio n. 30
0
    def __init__(self, args, device):
        super(CifarSearcher, self).__init__(args)

        self.device = device

        train_indices = list(range(0, 45000))
        valid_indices = list(range(45000, 50000))

        # Handle data
        transformer_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010)),
        ])

        transformer_val = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010)),
        ])

        transformers = {'train': transformer_train, 'test': transformer_val}

        dataset_training = torchvision.datasets.CIFAR10(
            root=args.data_dir,
            train=True,
            download=True,
            transform=transformers['train'])
        dataset_validate = torchvision.datasets.CIFAR10(
            root=args.data_dir,
            train=True,
            download=True,
            transform=transformers['train'])

        train_subset = Subset(dataset_training, train_indices)
        valid_subset = Subset(dataset_validate, valid_indices)

        trainloader = torch.utils.data.DataLoader(train_subset,
                                                  batch_size=args.batchsize,
                                                  shuffle=True,
                                                  num_workers=args.num_workers)
        devloader = torch.utils.data.DataLoader(valid_subset,
                                                batch_size=args.batchsize,
                                                shuffle=False,
                                                num_workers=args.num_workers)

        self.dataloaders = {'train': trainloader, 'dev': devloader}