def __init__(self, cfg): img_size = 32 trn_transform = common.get_aug(cfg, img_size, train=True, mean_std=cifar_mean_std) db_path = path_utils.get_datasets_dir(cfg.set) trn_dataset = torchvision.datasets.CIFAR10(db_path, train=True, transform=trn_transform, download=False) self.sampler = torch.utils.data.distributed.DistributedSampler( trn_dataset, rank=cfg.gpu, num_replicas=cfg.world_size, shuffle=True) self.trn_loader = torch.utils.data.DataLoader( dataset=trn_dataset, batch_size=cfg.batch_size // cfg.world_size, num_workers=max(cfg.num_threads // cfg.world_size, 1), # num_workers=0 , shuffle=False, pin_memory=True, drop_last=True, sampler=self.sampler, ) # self.trn_loader.num_batches = math.floor(len(trn_dataset) / (cfg.batch_size * cfg.world_size)) self.trn_loader.num_batches = math.floor( len(trn_dataset) / (cfg.batch_size)) self.trn_loader.num_files = len(trn_dataset) # self.trn_loader.batch_size = cfg.batch_size tst_transform = common.get_aug(cfg, img_size, train=False, mean_std=cifar_mean_std) tst_dataset = torchvision.datasets.CIFAR10(db_path, train=False, transform=tst_transform, download=False) self.tst_loader = torch.utils.data.DataLoader( dataset=tst_dataset, batch_size=cfg.batch_size, shuffle=False, num_workers=max(cfg.num_threads // 4, 1), # num_workers=0, pin_memory=True, drop_last=True) self.tst_loader.num_batches = math.floor( len(tst_dataset) / cfg.batch_size) self.tst_loader.num_files = len(tst_dataset) self.val_loader = self.tst_loader self.knn_loader = None
def create_loader(self, imgs_lst, cfg, is_training): db_path = path_utils.get_datasets_dir(cfg.set) if osp.exists(db_path + imgs_lst): data_df = pd.read_csv(db_path + imgs_lst) imgs, lbls = self.imgs_and_lbls(data_df) epoch_size = len(imgs) loader = torch.utils.data.DataLoader(CustomDataset( imgs, lbls, is_training=is_training), batch_size=cfg.batch_size, shuffle=is_training, num_workers=cfg.num_threads) loader.num_batches = math.ceil(epoch_size / cfg.batch_size) loader.num_files = epoch_size else: loader = None return loader
def __init__(self, cfg): db_path = path_utils.get_datasets_dir(cfg.set) self.img_path = db_path + '/jpg/' csv_file = '/lists/trn.csv' trn_data_df = pd.read_csv(db_path + csv_file) lbls = trn_data_df['label'] lbl2idx = np.sort(np.unique(lbls)) self.lbl2idx_dict = {k: v for v, k in enumerate(lbl2idx)} self.final_lbls = [self.lbl2idx_dict[x] for x in list(lbls.values)] self.num_classes = len(self.lbl2idx_dict.keys()) self.train_loader = self.create_loader(csv_file, cfg, is_training=True) csv_file = '/lists/tst.csv' self.tst_loader = self.create_loader(csv_file, cfg, is_training=False) csv_file = '/lists/val.csv' self.val_loader = self.create_loader(csv_file, cfg, is_training=False)