예제 #1
0
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        if kwargs["download"]:
            raise RuntimeError("Can't download ImageNet, sry")

        self.vanilla_train = ImageNet(self.dataset_dir, split="train")
        self.vanilla_val = ImageNet(self.dataset_dir, split="val")
예제 #2
0
def main(args):
    cfg = get_cfg()
    cfg.OUTPUT_DIR = args.output_dir

    num_workers = cfg.DATASETS.NUM_WORKER
    batch_size = cfg.SOLVER.BATCH_SIZE

    transforms = get_transforms()
    train_dl = DataLoader(
        # Coco2017Dataset(root=args.root),
        # CIFAR10(root=args.root, train=True, download=False, transform=transforms['train']),  # NOQA
        ImageNet(root=args.root, split='train', transform=transforms['train']),
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=True,
    )
    val_dl = DataLoader(
        # Coco2017Dataset(root=args.root, data_type='val'),
        # CIFAR10(root=args.root, train=False, download=False, transform=transforms['val']),  # NOQA
        ImageNet(root=args.root, split='val', transform=transforms['val']),
        batch_size=batch_size,
        num_workers=num_workers,
        shuffle=False,
    )
    assert val_dl

    optimizer = Optimizer(
        cfg=cfg,
        weight_path=args.weights,
        train_dl=train_dl,
        # val_dl=val_dl
    )
    optimizer()
def create_dataset(h5file,
                   root,
                   image_size=224,
                   batch_size=100,
                   num_workers=0):
    path = os.path.dirname(h5file)
    if not os.path.exists(path):
        os.makedirs(path)

    f = h5py.File(h5file, mode='w')

    try:
        # Train
        print('Train data:')
        data_train = ImageNet(root,
                              'train',
                              transform=get_transform(image_size))
        loader_train = get_loader(data_train, batch_size, num_workers)

        n_train = len(data_train)
        val_shape = (n_train, 3, image_size, image_size)

        f.create_dataset('train_data', val_shape, np.uint8)
        f.create_dataset('train_targets', (n_train, ), np.int)

        for i, batch in enumerate(tqdm(loader_train)):
            inds = list(
                range(i * batch_size, min((i + 1) * batch_size, n_train)))

            x = batch[0].detach().cpu().numpy()
            y = batch[1].detach().cpu().numpy() + 1

            f['train_data'][inds, ...] = x
            f['train_targets'][inds] = y

        # Validation
        print('Validation data:')
        data_val = ImageNet(root, 'val', transform=get_transform(image_size))
        loader_val = get_loader(data_val, batch_size, num_workers)

        n_val = len(data_val)
        val_shape = (n_val, 3, image_size, image_size)

        f.create_dataset('val_data', val_shape, np.uint8)
        f.create_dataset('val_targets', (n_val, ), np.int)

        for i, batch in enumerate(tqdm(loader_val)):
            inds = list(range(i * batch_size, min((i + 1) * batch_size,
                                                  n_val)))

            x = batch[0].detach().cpu().numpy()
            y = batch[1].detach().cpu().numpy() + 1

            f['val_data'][inds, ...] = x
            f['val_targets'][inds] = y
    finally:
        f.close()
예제 #4
0
    def build(self):
        train_dt = ImageNet(self.data_dir,
                            split='train',
                            transform=self.train_trans)
        test_dt = ImageNet(self.data_dir,
                           split='val',
                           transform=self.test_trans)

        return train_dt, test_dt
예제 #5
0
def get_dataloader_imagenet(train_batch_size=256,
                            test_batch_size=512,
                            dataset_root_path=IMAGENET_DATASET_ROOT_FOLDER,
                            num_workers=IMAGENET_NUM_WORKERS):
    '''
    Function that build the Imagenet dataloader with classic transform
    Return a train_loader and valid_loader which are a random split of the
    train dataset according valid_split
    Canonical data augmentation is apply on the train_loader
    Normalization is done for for all loader
    The valid loader use the test_batch_size

    Arguments:
        train_batch_size (int): size of batch to use for train_loader
        test_batch_size (int): size to use for test_loader, valid_loader
        num_workers (int): nuber of worker to use
    '''
    dataset_root_path = expanduser(dataset_root_path)
    # Build regular data transformation
    train_transforms = v_transforms.Compose([
        v_transforms.RandomResizedCrop(224),
        v_transforms.RandomHorizontalFlip(),
        v_transforms.ToTensor(),
        v_transforms.Normalize(mean=MEAN, std=STD),
    ])

    test_transforms = v_transforms.Compose([
        v_transforms.Resize(256),
        v_transforms.CenterCrop(224),
        v_transforms.ToTensor(),
        v_transforms.Normalize(mean=MEAN, std=STD),
    ])

    train_dataset = ImageNet(
        dataset_root_path,
        split='train',
        transform=train_transforms,
    )
    valid_dataset = ImageNet(dataset_root_path,
                             split='val',
                             transform=test_transforms,
                             download=True)

    # We use valid split both as val and test loader
    data_loader_train = DataLoader(train_dataset,
                                   batch_size=train_batch_size,
                                   shuffle=True,
                                   num_workers=num_workers,
                                   pin_memory=True)
    data_loader_valid = DataLoader(valid_dataset,
                                   batch_size=test_batch_size,
                                   num_workers=num_workers,
                                   pin_memory=True)

    return data_loader_train, data_loader_valid, data_loader_valid
예제 #6
0
def get_train_val_loaders(
    root_path: str,
    train_transforms: Callable,
    val_transforms: Callable,
    batch_size: int = 16,
    num_workers: int = 8,
    val_batch_size: Optional[int] = None,
    limit_train_num_samples: Optional[int] = None,
    limit_val_num_samples: Optional[int] = None,
) -> Tuple[DataLoader, DataLoader, DataLoader]:

    train_ds = ImageNet(
        root_path, split="train", transform=lambda sample: train_transforms(image=sample)["image"], loader=opencv_loader
    )
    val_ds = ImageNet(
        root_path, split="val", transform=lambda sample: val_transforms(image=sample)["image"], loader=opencv_loader
    )

    if limit_train_num_samples is not None:
        np.random.seed(limit_train_num_samples)
        train_indices = np.random.permutation(len(train_ds))[:limit_train_num_samples]
        train_ds = Subset(train_ds, train_indices)

    if limit_val_num_samples is not None:
        np.random.seed(limit_val_num_samples)
        val_indices = np.random.permutation(len(val_ds))[:limit_val_num_samples]
        val_ds = Subset(val_ds, val_indices)

    # random samples for evaluation on training dataset
    if len(val_ds) < len(train_ds):
        np.random.seed(len(val_ds))
        train_eval_indices = np.random.permutation(len(train_ds))[: len(val_ds)]
        train_eval_ds = Subset(train_ds, train_eval_indices)
    else:
        train_eval_ds = train_ds

    train_loader = idist.auto_dataloader(
        train_ds, shuffle=True, batch_size=batch_size, num_workers=num_workers, drop_last=True,
    )

    val_batch_size = batch_size * 4 if val_batch_size is None else val_batch_size
    val_loader = idist.auto_dataloader(
        val_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False,
    )

    train_eval_loader = idist.auto_dataloader(
        train_eval_ds, shuffle=False, batch_size=val_batch_size, num_workers=num_workers, drop_last=False,
    )

    return train_loader, val_loader, train_eval_loader
예제 #7
0
 def get_val_split(self, transform=None, num_samples=None):
     if num_samples is None: num_samples = self.NUM_VAL_SAMPLES
     if self.TRN_SET_SIZE - self.TOT_TRN_SAMPLES >= num_samples:
         # If there are enough samples left in the training set, use them for validation
         return Subset(
             ImageNet(root=self.DATASET_FOLDER,
                      split='train',
                      transform=transform),
             self.indices[self.TRN_SET_SIZE -
                          num_samples:self.TRN_SET_SIZE])
     # Validate directly on test set otherwise
     return Subset(
         ImageNet(root=self.DATASET_FOLDER,
                  split='val',
                  transform=transform), range(num_samples))
예제 #8
0
def prepare_imagenet_subset(data_dir: Path, imagenet_dir: Path):
    print('Preparing ImageNet subset...')
    random_state = random.Random(42)

    # Use a predownloaded imagenet. Only validation set is used.
    imagenet = ImageNet(imagenet_dir, split='val')
    # Randomly select 10% of the data in each category
    images = defaultdict(list)
    for image_path, category_id in imagenet.imgs:
        images[category_id].append(image_path)

    # Target root dir
    subset_dir = data_dir / 'imagenet'
    shutil.rmtree(subset_dir, ignore_errors=True)
    subset_dir.mkdir(parents=True)
    shutil.copyfile(imagenet_dir / 'meta.bin', subset_dir / 'meta.bin')
    copied_count = 0
    for category_id, imgs in images.items():
        random_state.shuffle(imgs)
        for img in imgs[:len(imgs) // 10]:
            folder_name = Path(img).parent.name
            file_name = Path(img).name
            (subset_dir / 'val' / folder_name).mkdir(exist_ok=True,
                                                     parents=True)
            shutil.copyfile(img, subset_dir / 'val' / folder_name / file_name)
            copied_count += 1
    print(f'Generated a subset of {copied_count} images.')
예제 #9
0
def get_imagenet_valset(root=".data", transform=None, target_transform=None):
    if transform is None:
        transform = get_imagenet_val_transform()
    ds = ImageNet(root=root,
                  split="val",
                  transform=transform,
                  target_transform=target_transform)
    return ds
예제 #10
0
def setup_dataloader(batch_size: int) -> (DataLoader, DataLoader):
    '''Setup dataloader for training and validation.
    '''
    train_set = ImageNet(config.imagenet_dataset_root_dir,
                         split='train',
                         download=True,
                         transform=image_transforms)
    val_set = ImageNet(config.imagenet_dataset_root_dir,
                       split='val',
                       download=True,
                       transform=image_transforms)
    train_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              **config.dataloader_config)
    val_loader = DataLoader(val_set,
                            batch_size=batch_size,
                            **config.dataloader_config)
    return train_loader, val_loader
예제 #11
0
    def prepare_datasets(self, download=False):
        if download:
            raise RuntimeError("Can't download ImageNet, sry")
        # the user should already have imagenet downloaded & extracted,
        # with meta.bin generated (this should change once the latest
        # version of pytorch, which knows you can't download ImageNet,
        # is released):
        # dataset_dir
        # |-meta.bin
        # |-train
        # | |-wnid1
        # | | |-image1.jpg
        # | | ...
        # | |-wnid2
        # | ...
        # |-val
        # | |-wnid1
        # | |-wnid2
        # | ...
        # 
        # each wnid is already a client, so
        # all we need to do in this method is generate the stats.json file

        # don't use self.vanilla_train/val since those don't exist yet
        vanilla_train = ImageNet(self.dataset_dir, split="train")
        vanilla_val = ImageNet(self.dataset_dir, split="val")

        # the clients must be sorted in the same order that looping over
        # vanilla_train will go through the classes
        images_per_client = []
        target = -1
        for s in vanilla_train.samples:
            if s[1] != target:
                images_per_client.append(0)
                target = s[1]
            images_per_client[-1] += 1
        num_val_images = len(vanilla_val.samples)
        stats = {"images_per_client": images_per_client,
                 "num_val_images": num_val_images}
        fn = self.stats_fn()
        if os.path.exists(fn):
            raise RuntimeError("won't overwrite existing stats file")
        with open(fn, "w") as f:
            json.dump(stats, f)
예제 #12
0
def imagenet_loader(batch_size, num_workers, datapath, cuda):
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform_train = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])
    transform_val = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])

    trainset = ImageNet(
        root=datapath, split='train', download=False,
        transform=transform_train)
    valset = ImageNet(
        root=datapath, split='val', download=False,
        transform=transform_val)

    if cuda:
        train_loader = torch.utils.data.DataLoader(
            trainset,
            batch_size=batch_size, shuffle=True,
            num_workers=num_workers, pin_memory=True)
        val_loader = torch.utils.data.DataLoader(
            valset,
            batch_size=batch_size, shuffle=False,
            num_workers=num_workers, pin_memory=True)
    else:
        train_loader = torch.utils.data.DataLoader(
            trainset,
            batch_size=batch_size, shuffle=True,
            num_workers=num_workers, pin_memory=False)
        val_loader = torch.utils.data.DataLoader(
            valset,
            batch_size=batch_size, shuffle=False,
            num_workers=num_workers, pin_memory=False)

    return train_loader, val_loader
예제 #13
0
def benchmark(model: nn.Module,
              transform,
              batch_size=64,
              device=device,
              fast: bool = False):

    valid_dataset = ImageNet(root="/home/zuppif/Downloads/ImageNet",
                             split="val",
                             transform=transform)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=12,
        pin_memory=True,
    )

    evaluator = ImageNetEvaluator(model_name="test",
                                  paper_arxiv_id="1905.11946")
    model.eval().to(device)

    num_batches = int(
        math.ceil(len(valid_loader.dataset) / float(valid_loader.batch_size)))

    start = time.time()

    with torch.no_grad():
        pbar = tqdm(np.arange(num_batches), leave=False)
        for i_val, (images, labels) in enumerate(valid_loader):

            images = images.to(device)
            labels = torch.squeeze(labels.to(device))

            net_out = model(images)

            image_ids = [
                get_img_id(img[0]) for img in
                valid_loader.dataset.imgs[i_val *
                                          valid_loader.batch_size:(i_val + 1) *
                                          valid_loader.batch_size]
            ]
            evaluator.add(dict(zip(image_ids, list(net_out.cpu().numpy()))))
            pbar.set_description(f"f1={evaluator.top1.avg:.2f}")
            pbar.update(1)
            if fast:
                break
        pbar.close()
    stop = time.time()
    if fast:
        return evaluator.top1.avg, None, None
    else:
        res = evaluator.get_results()
        return res["Top 1 Accuracy"], res["Top 5 Accuracy"], stop - start
예제 #14
0
파일: datasets.py 프로젝트: zcqian/lab-work
def imagenet1k():
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    dataset_dir = os.path.expanduser('~/Datasets/imagenet')
    dataset_train = ImageNet(dataset_dir,
                             split='train',
                             transform=transforms.Compose([
                                 transforms.RandomResizedCrop(224),
                                 transforms.RandomHorizontalFlip(),
                                 transforms.ToTensor(),
                                 normalize,
                             ]))
    dataset_val = ImageNet(dataset_dir,
                           split='val',
                           transform=transforms.Compose([
                               transforms.Resize(256),
                               transforms.CenterCrop(224),
                               transforms.ToTensor(),
                               normalize,
                           ]))
    return dataset_train, dataset_val
def make_loader(path, split, batch_size, num_workers):
    if split == 'train':
        transform = Compose([RandomResizedCrop(224,scale=(0.3,4.0/3)), ColorJitter(0.4,0.4,0.4), RandomHorizontalFlip(),
                             ToTensor()])
    else:
        transform = Compose([Resize(256), CenterCrop(224), ToTensor()])


    ds = ImageNet(path, split=split, transform=transform)

    loader = DataLoader(dataset=ds, batch_size=batch_size, shuffle=(split=='train'), num_workers=num_workers)

    return loader
예제 #16
0
    def __init__(self,
                 root='data',
                 features_name='resnet50',
                 train=True,
                 subsample_to=-1,
                 subsample_seed=123):
        """

        Parameters
        ----------

        root : string
            Path to the root of the data folder
        features_names : string
            Features expected to be in {root}/imagenet/{features_names}
        train : bool
            If True, loads the train set
        subsample_to : int
            Number of examples to sample
        subsample_seed:
            Seed that decides the randomness of the subsampling.

        """
        split = 'train' if train else 'val'

        # load the base ImageNet object just for reading the class names
        imagenet_base = ImageNet(os.path.join(root, 'imagenet'),
                                 split='val',
                                 download=False)
        group_names = [
            '%03d_' % i + e[0].replace(' ', '_')
            for i, e in enumerate(imagenet_base.classes)
        ]

        features_path = os.path.join(root, 'imagenet',
                                     features_name + '_features',
                                     split + '.pt')
        data = torch.load(features_path)
        xy = data['features'], data['targets']
        if subsample_to > 0:
            xy = utils.subsample_arrays(xy, subsample_to, subsample_seed)

        group_ids = xy[1]

        inner_dataset = TensorDataset(*xy)

        self.num_classes = 1000

        super().__init__(inner_dataset, group_ids, group_names)
예제 #17
0
  def __init__(self, params, batch_size, num_gpus, is_training):
    super(IMAGENETReader, self).__init__(
      params, batch_size, num_gpus, is_training)

    # Provide square images of this size. 
    self.image_size = self.params.imagenet_image_size
    if 'efficientnet' in self.params.model:
      self.image_size = {
        'efficientnet-b0': 224,
        'efficientnet-b1': 240,
        'efficientnet-b2': 260,
        'efficientnet-b3': 300,
        'efficientnet-b4': 380,
        'efficientnet-b5': 456,
        'efficientnet-b6': 528,
        'efficientnet-b7': 600,
      }[self.params.model]

    self.eigval = [0.2175, 0.0188, 0.0045]
    self.eigvec = [
          [-0.5675,  0.7192,  0.4009],
          [-0.5808, -0.0045, -0.8140],
          [-0.5836, -0.6948,  0.4203],
      ]

    self.normalize_mean = [0.485, 0.456, 0.406]
    self.normalize_std = [0.229, 0.224, 0.225]

    self.imagenet_normalize = transforms.Normalize(
      mean=self.normalize_mean,
      std=self.normalize_std
    )

    self.height, self.width = self.image_size, self.image_size
    self.n_train_files = 1281167
    self.n_test_files = 50000
    self.n_classes = 1000
    self.img_size = (None, 3, self.height, self.height)

    split = 'train' if self.is_training else 'val'

    if 'efficientnet' in self.params.model:
      transform = self.efficientnet_transform()
    else:
      transform = self.transform()

    self.dataset = ImageNet(self.path, split=split,
                            transform=transform)
예제 #18
0
    def __init__(self, params, batch_size, num_gpus, is_training):
        super(IMAGENETReader, self).__init__(params, batch_size, num_gpus,
                                             is_training)

        # Provide square images of this size.
        self.image_size = self.params.imagenet_image_size
        if 'efficientnet' in self.params.model:
            self.image_size = {
                'efficientnet-b0': 224,
                'efficientnet-b1': 240,
                'efficientnet-b2': 260,
                'efficientnet-b3': 300,
                'efficientnet-b4': 380,
                'efficientnet-b5': 456,
                'efficientnet-b6': 528,
                'efficientnet-b7': 600,
            }[self.params.model]

        self.imagenet_pca = {
            'eigval': [0.2175, 0.0188, 0.0045],
            'eigvec': [
                [-0.5675, 0.7192, 0.4009],
                [-0.5808, -0.0045, -0.8140],
                [-0.5836, -0.6948, 0.4203],
            ]
        }

        self.height, self.width = self.image_size, self.image_size
        self.n_train_files = 1281167
        self.n_test_files = 50000
        self.n_classes = 1000
        self.batch_shape = (None, 3, self.height, self.height)

        split = 'train' if self.is_training else 'val'

        if 'efficientnet' in self.params.model:
            transform = self.efficientnet_transform()
        else:
            transform = self.transform()

        if self.add_noise:
            transform.transforms.append(AddNoise(self.params))

        self.dataset = ImageNet(self.path,
                                split=split,
                                download=False,
                                transform=transform)
예제 #19
0
    def prepare_data(self):
        train_resize = transforms.Resize((self.hparams.image_size, self.hparams.image_size))
        train_normalize = transforms.Normalize(mean=[0.5], std=[0.5])
        train_transform = transforms.Compose([train_resize, transforms.ToTensor(), train_normalize])

        if self.hparams.dataset == "mnist":
            self.train_dataset = MNIST(self.hparams.dataset_path, train=True, download=True, transform=train_transform)
            # self.test_dataset = MNIST(self.hparams.dataset_path, train=False, download=True, transform=test_transform)
        elif self.hparams.dataset == "fashion_mnist":
            self.train_dataset = FashionMNIST(self.hparams.dataset_path, train=True, download=True, transform=train_transform)
            # self.test_dataset = FashionMNIST(self.hparams.dataset_path, train=False, download=True, transform=test_transform)
        elif self.hparams.dataset == "cifar10":
            self.train_dataset = CIFAR10(self.hparams.dataset_path, train=True, download=True, transform=train_transform)
            # self.test_dataset = CIFAR10(self.hparams.dataset_path, train=False, download=True, transform=test_transform)
        elif self.hparams.dataset == "image_net":
            self.train_dataset = ImageNet(self.hparams.dataset_path, train=True, download=True, transform=train_transform)
            # self.test_dataset = ImageNet(self.hparams.dataset_path, train=False, download=True, transform=test_transform)
        elif self.hparams.dataset == "lsun":
            self.train_dataset = LSUN(self.hparams.dataset_path + "/lsun", classes=[cls + "_train" for cls in self.hparams.dataset_classes], transform=train_transform)
            # self.test_dataset = LSUN(self.hparams.dataset_path, classes=[cls + "_test" for cls in self.hparams.dataset_classes], transform=test_transform)
        elif self.hparams.dataset == "celeba_hq":
            self.train_dataset = CelebAHQ(self.hparams.dataset_path, image_size=self.hparams.image_size, transform=train_transform)
        else:
            raise NotImplementedError("Custom dataset is not implemented yet")
def main(args):
    model = Model()

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=args.nesterov)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=args.step_size,
                                                gamma=args.gamma)

    if args.scheduler == 'multistep':
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                         args.milestones,
                                                         gamma=args.gamma)
    elif args.scheduler == 'cosine':
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, args.step_size)

    criterion = torch.nn.CrossEntropyLoss()

    model = model.cuda()
    criterion = criterion.cuda()

    start_epoch = 0

    # Check number of parameters your model
    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print(f"Number of parameters: {pytorch_total_params}")

    if not os.path.exists('{}'.format(args.savepath)):
        os.makedirs('{}'.format(args.savepath))

    # resume
    if args.resume:
        model, optimizer, start_epoch = load_ckpt(model, optimizer, args)

    # Dataloader
    if args.dataset == 'cifar10':
        normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                                         std=[0.2023, 0.1994, 0.2010])
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
        transform_train.transforms.insert(
            0, RandAugment(args.rand_n, args.rand_m))
        transform_val = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])
        trainset = CIFAR10(root=args.datapath,
                           train=True,
                           download=True,
                           transform=transform_train)
        valset = CIFAR10(root=args.datapath,
                         train=False,
                         download=True,
                         transform=transform_val)
    elif args.dataset == 'cifar100':
        normalize = transforms.Normalize(mean=[0.4914, 0.4822, 0.4465],
                                         std=[0.2023, 0.1994, 0.2010])
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
        transform_val = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])
        trainset = CIFAR100(root=args.datapath,
                            train=True,
                            download=True,
                            transform=transform_train)
        valset = CIFAR100(root=args.datapath,
                          train=False,
                          download=True,
                          transform=transform_val)
    elif args.dataset == 'ImageNet':
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(image_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
        transform_val = transforms.Compose([
            transforms.Resize(image_size + 32),
            transforms.CenterCrop(image_size),
            transforms.ToTensor(),
            normalize,
        ])
        trainset = ImageNet(root=args.datapath,
                            split='train',
                            download=False,
                            transform=transform_train)
        valset = ImageNet(root=args.datapath,
                          split='val',
                          download=False,
                          transform=transform_val)
    elif args.dataeset == 'tiny-imagenet-200':
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        transform_train = transforms.Compose([
            transforms.RandomResizedCrop(image_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
        transform_val = transforms.Compose([
            transforms.Resize(image_size + 32),
            transforms.CenterCrop(image_size),
            transforms.ToTensor(),
            normalize,
        ])
        trainset = ImageFolder(root=args.datapath,
                               split='train',
                               download=False,
                               transform=transform_train)
        valset = ImageFolder(root=args.datapath,
                             split='val',
                             download=False,
                             transform=transform_val)

    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers,
                                               pin_memory=False)
    val_loader = torch.utils.data.DataLoader(valset,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.num_workers,
                                             pin_memory=False)

    # start training
    last_top1_acc = 0
    acc1_valid = 0
    best_acc1 = 0
    is_best = False
    for epoch in range(start_epoch, args.epochs):
        print("\n----- epoch: {}, lr: {} -----".format(
            epoch, optimizer.param_groups[0]["lr"]))

        # train for one epoch
        start_time = time.time()
        last_top1_acc = train(train_loader, epoch, model, optimizer, criterion)
        elapsed_time = time.time() - start_time
        print('==> {:.2f} seconds to train this epoch\n'.format(elapsed_time))

        # validate for one epoch
        start_time = time.time()
        acc1_valid = validate(val_loader, model, criterion)
        elapsed_time = time.time() - start_time
        print(
            '==> {:.2f} seconds to validate this epoch\n'.format(elapsed_time))

        # learning rate scheduling
        scheduler.step()

        summary = [epoch, last_top1_acc, acc1_valid.item()]

        is_best = acc1_valid > best_acc1
        best_acc1 = max(acc1_valid, best_acc1)

        save_summary('rexnetv1', args.dataset, args.name, summary)

        checkpoint = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }

        save_ckpt(checkpoint, is_best, args)

        #if is_best:
        #  torch.save(model.state_dict(), args.savepath+'model_weight_best.pth')

        # Save model each epoch
        #torch.save(model.state_dict(), args.savepath+'model_weight_epoch{}.pth'.format(epoch))

    print(f"Last Top-1 Accuracy: {last_top1_acc}")
    print(f"Best valid Top-1 Accuracy: {best_acc1}")
    print(f"Number of parameters: {pytorch_total_params}")
예제 #21
0
 def get_test_split(self, transform=None, num_samples=None):
     if num_samples is None: num_samples = self.NUM_TST_SAMPLES
     return Subset(
         ImageNet(root=self.DATASET_FOLDER,
                  split='val',
                  transform=transform), range(num_samples))
예제 #22
0
 def get_train_split(self, transform=None, num_samples=None):
     if num_samples is None: num_samples = self.TOT_TRN_SAMPLES
     return Subset(
         ImageNet(root=self.DATASET_FOLDER,
                  split='train',
                  transform=transform), self.indices[:num_samples])
예제 #23
0
def get_dataset(path: Path, transforms: Callable = None) -> Dataset:
    return ImageNet(str(path), split='val', transform=transforms)
예제 #24
0
def _make_data_loader_imagenet(root,
                               batch_size,
                               workers=4,
                               is_train=True,
                               download=False,
                               distributed=False):
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    logger = logging.getLogger('octconv')

    if is_train:
        logger.info("Loading ImageNet training data")

        st = time.time()
        scale = (0.08, 1.0)

        transform = transforms.Compose([
            transforms.RandomResizedCrop(224, scale=scale),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])

        dataset = ImageNet(root=root,
                           split='train',
                           download=download,
                           transform=transform)

        logger.info("Took: {}".format(time.time() - st))

        if distributed:
            sampler = torch.utils.data.distributed.DistributedSampler(dataset)
        else:
            sampler = torch.utils.data.RandomSampler(dataset)

        loader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=workers,
                            sampler=sampler,
                            pin_memory=True)
    else:
        logger.info("Loading ImageNet validation data")

        transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])

        dataset = ImageNet(root=root,
                           split='val',
                           download=download,
                           transform=transform)

        if distributed:
            sampler = torch.utils.data.distributed.DistributedSampler(dataset)
        else:
            sampler = torch.utils.data.SequentialSampler(dataset)

        loader = DataLoader(dataset,
                            batch_size=batch_size,
                            num_workers=workers,
                            sampler=sampler,
                            pin_memory=True)

    return loader
예제 #25
0
def load_dataset(dataset_name, **kwargs):
    """ Loads the specified dataset and returns a PyTorch dataset object.

    Applies the standard transformations for said dataset by default.
    """
    data_path = pathlib.Path('data').resolve()

    if dataset_name == 'cifar10':
        from torchvision.datasets import CIFAR10

        # This is the standard normalization transformation
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010)),
        ])

        # User can specify to load the training set; loads the test set by default.
        train = kwargs.pop('train', False)
        dataset = CIFAR10(data_path,
                          train=train,
                          transform=transform,
                          download=True)
    elif dataset_name == 'cifar100':
        from torchvision.datasets import CIFAR100

        # This is the standard normalization transformation
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010)),
        ])

        # User can specify to load the training set; loads the test set by default.
        train = kwargs.pop('train', False)
        dataset = CIFAR100(data_path,
                           train=train,
                           transform=transform,
                           download=True)
    elif dataset_name == 'imagenet':
        # Requires imagenet to be downloaded locally
        from torchvision.datasets import ImageNet

        # Standard transformation
        transform = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
        dataset = ImageNet(data_path / 'imagenet',
                           split='val',
                           transform=transform)
    elif dataset_name == 'cifar10r':
        from data.nonstationary_datasets import CIFAR10R
        dataset = CIFAR10R()
    elif dataset_name == 'cifar100r':
        from data.nonstationary_datasets import CIFAR100R
        dataset = CIFAR100R()
    elif dataset_name == 'cifar10gb':
        from data.nonstationary_datasets import CIFAR10GB
        dataset = CIFAR10GB()
    elif dataset_name == 'cifar100gb':
        from data.nonstationary_datasets import CIFAR100GB
        dataset = CIFAR100GB()
    elif dataset_name == 'cifar10imba':
        from data.imbalanced_datasets import CIFAR10Imba
        dataset = CIFAR10Imba(class_ratios=kwargs['class_ratios'])
    else:
        raise NotImplementedError

    return dataset
예제 #26
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = getattr(resnet, args.arch)()

    if not torch.cuda.is_available():
        print('using CPU, this will be slow')
    elif args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    img_net = ImageNet(args.data)
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir,
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize,
        ])),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        # evaluate on validation set
        acc1 = validate(val_loader, model, criterion, args)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                }, is_best)
예제 #27
0
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import ImageNet

print("Pytorch version:{}".format(torch.__version__))

#useful links
#https://pytorch.org/docs/stable/_modules/torchvision/datasets/imagenet.html#ImageNet
#https://pytorch.org/docs/stable/torchvision/datasets.html#imagenet

#%%
"""
#_________________________________Dataloader for ImageNet______________________________#
#                                                                                      #  
# The source code of the class ImageNet from pytorch says                              #
#                                                                                      #
# The dataset is no longer publicly accessible.                                        # 
# You need to download the archives externally and place them in the root directory.   #                                        
#                                                                                      #
#  I couldn't download the image because it takes 5 days to check a new account on     #
#  the ImageNet website so i just implement the dataloader with the class ImageNet     #
#  from pytorch                                                                        #
#                                                                                      #                                                                         
#______________________________________________________________________________________#
"""

root_dir = "path_to_ImageNet_directory_file"

data = ImageNet(root=root_dir)
data_loader = DataLoader(data, batch_size=1)
예제 #28
0
#input_transform = transforms.Compose([
#    transforms.Resize(224,PIL.Image.BICUBIC), transforms.ToTensor(),
#    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
#])
input_transform = transforms.Compose([
    transforms.Resize(256, PIL.Image.BICUBIC),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


test_dataset = ImageNet(
    DATA_ROOT,
    split="val",
    transform=input_transform,
    target_transform=None,
    download=True,
)

test_loader = DataLoader(
    test_dataset,
    batch_size=64,
    shuffle=False,
    num_workers=4,
    pin_memory=True,
)

model = model.cuda()
model.eval()
예제 #29
0
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer,
                                                                     T_0=3)

################################################
#       Dataset and train-test helpers
################################################
transform_val = transforms.Compose([
    transforms.Resize(224),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

data_root = args.IMAGENET_DIR
train_ds = ImageNet(data_root,
                    split='train',
                    download=False,
                    transform=transform_val)
train_loader = torch.utils.data.DataLoader(train_ds,
                                           batch_size=args.BATCHSIZE,
                                           shuffle=True,
                                           drop_last=False,
                                           num_workers=args.NUM_WORKERS,
                                           pin_memory=True)

val_ds = ImageNet(data_root,
                  split='val',
                  download=False,
                  transform=transform_val)
val_loader = torch.utils.data.DataLoader(val_ds,
                                         batch_size=args.BATCHSIZE,
                                         shuffle=True,
예제 #30
0
def create_dataset(name,
                   root,
                   split='validation',
                   search_split=True,
                   class_map=None,
                   load_bytes=False,
                   is_training=False,
                   download=False,
                   batch_size=None,
                   repeats=0,
                   **kwargs):
    """ Dataset factory method

    In parenthesis after each arg are the type of dataset supported for each arg, one of:
      * folder - default, timm folder (or tar) based ImageDataset
      * torch - torchvision based datasets
      * TFDS - Tensorflow-datasets wrapper in IterabeDataset interface via IterableImageDataset
      * all - any of the above

    Args:
        name: dataset name, empty is okay for folder based datasets
        root: root folder of dataset (all)
        split: dataset split (all)
        search_split: search for split specific child fold from root so one can specify
            `imagenet/` instead of `/imagenet/val`, etc on cmd line / config. (folder, torch/folder)
        class_map: specify class -> index mapping via text file or dict (folder)
        load_bytes: load data, return images as undecoded bytes (folder)
        download: download dataset if not present and supported (TFDS, torch)
        is_training: create dataset in train mode, this is different from the split.
            For Iterable / TDFS it enables shuffle, ignored for other datasets. (TFDS)
        batch_size: batch size hint for (TFDS)
        repeats: dataset repeats per iteration i.e. epoch (TFDS)
        **kwargs: other args to pass to dataset

    Returns:
        Dataset object
    """
    name = name.lower()
    if name.startswith('torch/'):
        name = name.split('/', 2)[-1]
        torch_kwargs = dict(root=root, download=download, **kwargs)
        if name in _TORCH_BASIC_DS:
            ds_class = _TORCH_BASIC_DS[name]
            use_train = split in _TRAIN_SYNONYM
            ds = ds_class(train=use_train, **torch_kwargs)
        elif name == 'inaturalist' or name == 'inat':
            assert has_inaturalist, 'Please update to PyTorch 1.10, torchvision 0.11+ for Inaturalist'
            target_type = 'full'
            split_split = split.split('/')
            if len(split_split) > 1:
                target_type = split_split[0].split('_')
                if len(target_type) == 1:
                    target_type = target_type[0]
                split = split_split[-1]
            if split in _TRAIN_SYNONYM:
                split = '2021_train'
            elif split in _EVAL_SYNONYM:
                split = '2021_valid'
            ds = INaturalist(version=split,
                             target_type=target_type,
                             **torch_kwargs)
        elif name == 'places365':
            assert has_places365, 'Please update to a newer PyTorch and torchvision for Places365 dataset.'
            if split in _TRAIN_SYNONYM:
                split = 'train-standard'
            elif split in _EVAL_SYNONYM:
                split = 'val'
            ds = Places365(split=split, **torch_kwargs)
        elif name == 'imagenet':
            if split in _EVAL_SYNONYM:
                split = 'val'
            ds = ImageNet(split=split, **torch_kwargs)
        elif name == 'image_folder' or name == 'folder':
            # in case torchvision ImageFolder is preferred over timm ImageDataset for some reason
            if search_split and os.path.isdir(root):
                # look for split specific sub-folder in root
                root = _search_split(root, split)
            ds = ImageFolder(root, **kwargs)
        else:
            assert False, f"Unknown torchvision dataset {name}"
    elif name.startswith('tfds/'):
        ds = IterableImageDataset(root,
                                  parser=name,
                                  split=split,
                                  is_training=is_training,
                                  download=download,
                                  batch_size=batch_size,
                                  repeats=repeats,
                                  **kwargs)
    else:
        # FIXME support more advance split cfg for ImageFolder/Tar datasets in the future
        if search_split and os.path.isdir(root):
            # look for split specific sub-folder in root
            root = _search_split(root, split)
        ds = ImageDataset(root,
                          parser=name,
                          class_map=class_map,
                          load_bytes=load_bytes,
                          **kwargs)
    return ds