コード例 #1
0
ファイル: helpers.py プロジェクト: zmd971202/IronyGeneration
def prepare_senti_data(hps, vocab):
    print('preparing senti data...')
    dataset = DisDataset(hps.senti_train_data_path,
                         hps.senti_train_label_path,
                         vocab,
                         debug=False)

    weights = make_weights_for_balanced_classes(dataset.ls, 2, PosOverNeg=1)
    sampler = WeightedRandomSampler(weights, len(weights))

    train_data_loader = DataLoader(dataset,\
                             batch_size=hps.senti_batch_size,\
                             shuffle=False,\
                             collate_fn=collate_fn, drop_last=False, sampler=sampler)

    dataset = DisDataset(hps.senti_dev_data_path,
                         hps.senti_dev_label_path,
                         vocab,
                         debug=False)
    weights = make_weights_for_balanced_classes(dataset.ls, 2, PosOverNeg=1)
    sampler = WeightedRandomSampler(weights, len(weights))
    dev_data_loader = DataLoader(dataset,\
                             batch_size=hps.senti_batch_size,\
                             shuffle=False,\
                             collate_fn=collate_fn, drop_last=False, sampler=sampler)
    return train_data_loader, dev_data_loader
コード例 #2
0
def load_dataset(args,
                 INPUT_SIZE=[112, 112],
                 RGB_MEAN=[0.5, 0.5, 0.5],
                 RGB_STD=[0.5, 0.5, 0.5],
                 val_datasets=[
                     'lfw', 'cfp_ff', 'cfp_fp', 'agedb_30', 'calfw', 'cplfw',
                     'vgg2_fp'
                 ]):
    train_transform = transforms.Compose([
        transforms.Resize(
            [int(128 * INPUT_SIZE[0] / 112),
             int(128 * INPUT_SIZE[0] / 112)]),
        transforms.RandomCrop([INPUT_SIZE[0], INPUT_SIZE[1]]),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=RGB_MEAN, std=RGB_STD),
    ])
    train_data = dset.ImageFolder(
        os.path.join(args.data_path, 'CASIA-maxpy-align'), train_transform)
    weights = torch.DoubleTensor(
        make_weights_for_balanced_classes(train_data.imgs,
                                          len(train_data.classes)))
    if args.distributed:
        from catalyst.data.sampler import DistributedSamplerWrapper
        train_sampler = DistributedSamplerWrapper(
            WeightedRandomSampler(weights, len(weights)))
    else:
        train_sampler = WeightedRandomSampler(weights, len(weights))
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(
            [int(128 * INPUT_SIZE[0] / 112),
             int(128 * INPUT_SIZE[0] / 112)]),
        transforms.CenterCrop([INPUT_SIZE[0], INPUT_SIZE[1]]),
        transforms.ToTensor(),
        transforms.Normalize(mean=RGB_MEAN, std=RGB_STD)
    ])
    val_loaders = []
    for name in val_datasets:
        carray = bcolz.carray(rootdir=os.path.join(args.data_path, name),
                              mode='r')
        val_data_tensor = torch.tensor(carray[:, [2, 1, 0], :, :]) * 0.5 + 0.5
        val_data = TensorsDataset(val_data_tensor, val_transform)
        val_loader = torch.utils.data.DataLoader(val_data,
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True,
                                                 sampler=None)
        issame = np.load('{}/{}_list.npy'.format(args.data_path, name))
        val_loaders.append((name, val_loader, issame))

    return train_loader, val_loaders
コード例 #3
0
ファイル: helpers.py プロジェクト: zmd971202/IronyGeneration
def prepare_discriminator_data(hps, vocab):
    print('preparing dis data...')
    train_dataset = DisDataset(hps.dis_train_data_path,
                               hps.dis_train_label_path,
                               vocab,
                               debug=False)

    train_weights = make_weights_for_balanced_classes(train_dataset.ls,
                                                      2,
                                                      PosOverNeg=1)
    train_sampler = WeightedRandomSampler(train_weights, len(train_weights))

    train_data_loader = DataLoader(train_dataset,\
                             batch_size=hps.dis_batch_size,\
                             shuffle=False,\
                             collate_fn=collate_fn, drop_last=False, sampler=train_sampler)

    dev_dataset = DisDataset(hps.dis_dev_data_path,
                             hps.dis_dev_label_path,
                             vocab,
                             debug=False)
    dev_weights = make_weights_for_balanced_classes(dev_dataset.ls,
                                                    2,
                                                    PosOverNeg=1)
    dev_sampler = WeightedRandomSampler(dev_weights, len(dev_weights))
    dev_data_loader = DataLoader(dev_dataset,\
                             batch_size=hps.dis_batch_size,\
                             shuffle=False,\
                             collate_fn=collate_fn, drop_last=False, sampler=dev_sampler)
    return train_data_loader, dev_data_loader
コード例 #4
0
    def get_batch(self):
        # get sequence idx
        sampler = WeightedRandomSampler(self.memory['sum_priority'],
                                        self.batch_size)
        seq_idx = list(sampler)

        # get episode idx
        epi_idx = []
        for seq in seq_idx:
            sampler = WeightedRandomSampler(self.memory['priority'][seq], 1)
            epi_idx.append(list(sampler)[0])

        # get batch
        batch = {}
        for key in self.key_list:
            batch[key] = list()

        for key in self.key_list:
            for seq, epi in zip(seq_idx, epi_idx):
                start_v = self.overlap_size * epi
                end_v = self.overlap_size * epi + self.seq_size

                if end_v >= len(self.memory['reward'][seq]):  # out of range
                    end_v = len(self.memory['reward'][seq])
                    start_v = end_v - self.seq_size

                if key == 'state' or key == 'done':
                    end_v += self.multi_step

                if key == 'state':
                    state_list = []
                    for i in range(-3, 0):
                        ss = start_v + i
                        if ss < 0:
                            ss = 0
                        state_list.append(self.memory[key][seq][ss])
                    state_list = np.array(state_list)
                    state_list = np.concatenate(
                        (state_list, self.memory[key][seq][start_v:end_v]),
                        axis=0)
                    batch[key].append(state_list)
                elif key == 'recc' or key == 'target_recc':
                    batch[key].append(self.memory[key][seq][start_v])
                else:
                    batch[key].append(self.memory[key][seq][start_v:end_v])
            batch[key] = np.array(batch[key])

        p = [
            self.memory['priority'][seq][epi]
            for seq, epi in zip(seq_idx, epi_idx)
        ] / self.sum_p
        weights = (self.N * p)**(-self.importance_exp)
        weights /= np.max(weights)

        return batch, seq_idx, epi_idx, weights
コード例 #5
0
def load_data(train_data, val_data, test_data, opts):
    """Creates training and test data loaders.
    """

    train_dataset = KeystrokeDataset(train_data, opts)
    val_dataset = KeystrokeDataset(val_data, opts)
    test_dataset = KeystrokeDataset(test_data, opts)

    train_sampler = WeightedRandomSampler([
        1 / train_dataset.class_counts[sample[1]]
        for sample in train_dataset.samples
    ], len(train_dataset.samples))
    val_sampler = WeightedRandomSampler([
        1 / val_dataset.class_counts[sample[1]]
        for sample in val_dataset.samples
    ], len(val_dataset.samples))
    test_sampler = WeightedRandomSampler([
        1 / test_dataset.class_counts[sample[1]]
        for sample in test_dataset.samples
    ], len(test_dataset.samples))

    if opts.balance_classes:
        print("Using WeightedRandomSampler")
        train_dloader = DataLoader(dataset=train_dataset,
                                   batch_size=min(1, len(train_dataset)),
                                   sampler=train_sampler,
                                   num_workers=opts.num_workers)
        val_dloader = DataLoader(dataset=val_dataset,
                                 batch_size=min(1, len(val_dataset)),
                                 sampler=val_sampler,
                                 num_workers=opts.num_workers)
        test_dloader = DataLoader(dataset=test_dataset,
                                  batch_size=min(1, len(test_dataset)),
                                  sampler=test_sampler,
                                  num_workers=opts.num_workers)
    else:
        print("Not weighting classes")
        train_dloader = DataLoader(dataset=train_dataset,
                                   batch_size=min(1, len(train_dataset)),
                                   shuffle=True,
                                   num_workers=opts.num_workers)
        val_dloader = DataLoader(dataset=val_dataset,
                                 batch_size=min(1, len(val_dataset)),
                                 shuffle=True,
                                 num_workers=opts.num_workers)
        test_dloader = DataLoader(dataset=test_dataset,
                                  batch_size=min(1, len(test_dataset)),
                                  shuffle=True,
                                  num_workers=opts.num_workers)

    return train_dataset.channels, len(
        train_dataset.class_counts), train_dloader, val_dloader, test_dloader
コード例 #6
0
ファイル: dataset.py プロジェクト: Tomiinek/WaveRNN
    def __init__(self, data_source):
        lebel_freq = {}
        for idx in range(len(data_source)):
            label = data_source.items[idx]['language']
            if label in lebel_freq: lebel_freq[label] += 1
            else: lebel_freq[label] = 1

        total = float(sum(lebel_freq.values()))
        weights = [
            total / lebel_freq[data_source.items[idx]['language']]
            for idx in range(len(data_source))
        ]
        self._sampler = WeightedRandomSampler(weights, len(weights))
コード例 #7
0
    def get_batch(self):
        # get sequence idx
        sampler = WeightedRandomSampler(self.memory['sum_priority'],
                                        self.batch_size)
        seq_idx = list(sampler)

        # get episode idx
        epi_idx = []
        for seq in seq_idx:
            sampler = WeightedRandomSampler(self.memory['priority'][seq], 1)
            epi_idx.append(list(sampler)[0])

        # get batch
        batch = {}
        for key in self.key_list + ['next_state']:
            batch[key] = list()

        for seq, epi in zip(seq_idx, epi_idx):
            next_idx = epi + self.multi_step + 1

            for key in ['state']:
                state_list = []
                for i in range(-3, 1):  # -3 ~ 0
                    ss = epi + i
                    if ss < 0:
                        ss = 0
                    state_list.append(self.memory[key][seq][ss])
                state_list = np.concatenate(state_list, axis=0)
                batch[key].append(state_list)

            batch['action'].append(self.memory['action'][seq][epi])
            batch['reward'].append(self.memory['reward'][seq][epi])

            batch['next_state'].append(
                np.concatenate(self.memory['state'][seq][next_idx -
                                                         4:next_idx],
                               axis=0))
            batch['done'].append(self.memory['done'][seq][next_idx - 1])

        for key in self.key_list + ['next_state']:
            batch[key] = np.array(batch[key])

        p = [
            self.memory['priority'][seq][epi]
            for seq, epi in zip(seq_idx, epi_idx)
        ] / self.sum_p
        weights = (self.N * p)**(-self.importance_exp)
        weights /= np.max(weights)

        return batch, seq_idx, epi_idx, weights
コード例 #8
0
def _setup_sampler(sampler_type, num_iters, batch_size):
    if sampler_type is None:
        return None

    if sampler_type == "weighted":
        from torch.utils.data.sampler import WeightedRandomSampler

        w = torch.ones(num_iters * batch_size, dtype=torch.float)
        for i in range(num_iters):
            w[batch_size * i:batch_size * (i + 1)] += i * 1.0
        return WeightedRandomSampler(w,
                                     num_samples=num_iters * batch_size,
                                     replacement=True)

    if sampler_type == "distributed":
        from torch.utils.data.distributed import DistributedSampler
        import torch.distributed as dist

        num_replicas = 1
        rank = 0
        if dist.is_available() and dist.is_initialized():
            num_replicas = dist.get_world_size()
            rank = dist.get_rank()

        dataset = torch.zeros(num_iters * batch_size)
        return DistributedSampler(dataset,
                                  num_replicas=num_replicas,
                                  rank=rank)
コード例 #9
0
    def get_dataloaders(self, batch_size=16, n_workers=4):
        # note: no data augmentation applied since all cards will be oriented the same way 
        transform = CardTransforms(size=128)
        trainset = CardDataset(self.train_img_paths, self.train_labels, compute_weights=self.uniform_sampling, cardtransforms=transform)
        sampler = None
        if self.uniform_sampling:
            sampler = WeightedRandomSampler(trainset.sample_weights, len(trainset.sample_weights), replacement=True)
            print("created uniform sampler")

        trainloader = DataLoader(trainset,
                                batch_size=batch_size,
                                shuffle=not self.uniform_sampling,
                                sampler=sampler,
                                num_workers=n_workers,
                                pin_memory=True)
        valloader = None
        if self.val_split > 0.0:
            valset = CardDataset(self.val_img_paths, self.val_labels, compute_weights=False, cardtransforms=transform)
            valloader = DataLoader(valset,
                                  batch_size=batch_size, 
                                  shuffle=False,
                                  sampler=None,
                                  num_workers=n_workers, 
                                  pin_memory=True)
        return trainloader, valloader
コード例 #10
0
 def on_train_begin(self, **kwargs):
     self.old_dl = self.data.train_dl
     weights, n_samples = SampleWeights(self.data.train_dl, self.data.c,
                                        self.weights).by_class()
     sampler = WeightedRandomSampler(weights, n_samples)
     self.data.train_dl = self.data.train_dl.new(shuffle=False,
                                                 sampler=sampler)
コード例 #11
0
def retrain():
    dataset = datasets.PixelLinkIC15Dataset(config.train_images_dir,
                                            config.train_labels_dir)
    sampler = WeightedRandomSampler([1 / len(dataset)] * len(dataset),
                                    config.batch_size,
                                    replacement=True)
    dataloader = DataLoader(dataset,
                            batch_size=config.batch_size,
                            sampler=sampler)
    my_net = net.Net()
    if config.gpu:
        device = torch.device("cuda:0")
        my_net = my_net.cuda()
        if config.multi_gpu:
            my_net = nn.DataParallel(my_net)
    else:
        device = torch.device("cpu")
    my_net.load_state_dict(
        torch.load(config.saving_model_dir +
                   '%d.mdl' % config.retrain_model_index))
    optimizer = optim.SGD(my_net.parameters(), lr=config.retrain_learning_rate2, \
                            momentum=config.momentum, weight_decay=config.weight_decay)
    optimizer2 = optim.SGD(my_net.parameters(), lr=config.retrain_learning_rate, \
                            momentum=config.momentum, weight_decay=config.weight_decay)
    train(config.retrain_epoch, config.retrain_model_index, dataloader, my_net,
          optimizer, optimizer2, device)
コード例 #12
0
ファイル: load_data.py プロジェクト: HunYuanfeng/DST2020
def LoadData(data, word2id, batch_size, use_weighted_sample=False):
    data_info = dict()
    data_keys = data[0].keys()
    for k in data_keys:
        data_info[k] = []
    for pair in data:
        for k in data_keys:
            data_info[k].append(pair[k])
    dataset = Dataset(data_info, word2id, sequicity=0)

    if use_weighted_sample == True:
        weights = [
            1 if data["gating_label"] == [0] * len(ALL_SLOTS) else 9
            for data in dataset
        ]
        from torch.utils.data.sampler import WeightedRandomSampler
        sampler = WeightedRandomSampler(weights,
                                        num_samples=len(dataset),
                                        replacement=True)
    else:
        sampler = None

    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              sampler=sampler,
                                              collate_fn=collate_fn)
    #data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)

    return data_loader
コード例 #13
0
def get_loader(dataset, train, reweight_groups, **kwargs):
    if not train:  # Validation or testing
        assert reweight_groups is None
        shuffle = False
        sampler = None
    elif not reweight_groups:  # Training but not reweighting
        shuffle = True
        sampler = None
    else:  # Training and reweighting
        # When the --robust flag is not set, reweighting changes the loss function
        # from the normal ERM (average loss over each training example)
        # to a reweighted ERM (weighted average where each (y,c) group has equal weight) .
        # When the --robust flag is set, reweighting does not change the loss function
        # since the minibatch is only used for mean gradient estimation for each group separately
        group_weights = len(dataset) / dataset._group_counts
        weights = group_weights[dataset._group_array]
        # Replacement needs to be set to True, otherwise we'll run out of minority samples
        sampler = WeightedRandomSampler(weights,
                                        len(dataset),
                                        replacement=True)
        shuffle = False

    # assert shuffle == False
    loader = DataLoader(dataset, shuffle=shuffle, sampler=sampler, **kwargs)
    return loader
コード例 #14
0
def test_dist_proxy_sampler():

    weights = torch.ones(100)
    weights[:50] += 1
    num_samples = 200
    sampler = WeightedRandomSampler(weights, num_samples)

    num_replicas = 8
    dist_samplers = [DistributedProxySampler(sampler, num_replicas=num_replicas, rank=i) for i in range(num_replicas)]

    for seed in range(100):
        torch.manual_seed(seed)
        true_indices = list(sampler)

        indices_per_rank = []
        for s in dist_samplers:
            s.set_epoch(seed)
            indices_per_rank += list(s)

        set_indices_per_rank = set(indices_per_rank)
        set_true_indices = set(true_indices)
        assert (
            set_indices_per_rank == set_true_indices
        ), f"{set_true_indices - set_indices_per_rank} | {set_indices_per_rank - set_true_indices}"

    with pytest.raises(TypeError, match=r"Argument sampler should be instance of torch Sampler"):
        DistributedProxySampler(None)

    with pytest.raises(TypeError, match=r"Argument sampler should have length"):
        DistributedProxySampler(Sampler([1]))
コード例 #15
0
ファイル: dataloader.py プロジェクト: Tomoya-K-0504/ml_dl_pkg
def set_ml_dataloader(dataset, phase, cfg, shuffle=False):
    if phase in ['test', 'infer']:
        dataloader = WrapperDataLoader(dataset,
                                       batch_size=cfg.batch_size,
                                       num_workers=cfg.n_jobs,
                                       pin_memory=True,
                                       sampler=None,
                                       shuffle=False,
                                       drop_last=False)
    else:
        if sum(cfg.sample_balance) != 0.0:
            if cfg.task_type.value == 'classify':
                weights = make_weights_for_balanced_classes(
                    dataset.get_labels(), cfg.sample_balance)
            else:
                weights = [torch.Tensor([1.0])] * len(dataset.get_labels())
            sampler = WeightedRandomSampler(weights,
                                            int(len(dataset) * cfg.epoch_rate))
        else:
            sampler = None
        dataloader = WrapperDataLoader(dataset,
                                       batch_size=len(dataset),
                                       num_workers=cfg.n_jobs,
                                       pin_memory=True,
                                       sampler=sampler,
                                       shuffle=shuffle)
    return dataloader
コード例 #16
0
    def test_fsd_train(self):
        val_prop = 0.2
        # grouping_variables = ['label', 'manually_verified'] ##For Stratified Split and Sampling
        data_path = f'{self.data_base_dir}/Freesound/FSDKaggle2018.meta/train_post_competition.csv'
        dataset = FSDTrainDataset(data_path)
        samples = 1000
        batch_size = 2

        train, val = train_test_split(dataset.classes, test_size=val_prop,
                stratify=dataset.classes['factor'])


        dataset_train = Subset(dataset, train['idx'])
        dataset_train_weights = [1/(len(dataset.counts.keys())*dataset.counts[dataset.classes.loc[a]['factor']]) if a in train['idx'] else 0 for a in range(len(dataset))]
        train_sampler = WeightedRandomSampler(
            weights = dataset_train_weights,
            replacement=True,
            num_samples = samples)

        train_loader = DataLoader(
            dataset=dataset_train,
            batch_size = batch_size,
            sampler = train_sampler, # this is necessar for some reason
            pin_memory = True,
            collate_fn = dataset.collate_batch
        )

        for i, (inputs, labels) in enumerate(train_loader):
            print(inputs)
            print(labels)
            break
コード例 #17
0
def main(cfg):
    torch.multiprocessing.set_sharing_strategy('file_system')
    seed_torch(seed=cfg.seed)

    output_dir = os.path.join(cfg.output_dir, cfg.desc)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    train_dataset = build_dataset(cfg, phase='train')
    test_dataset = build_dataset(cfg, phase='test')

    if cfg.DATA.weighted_sample:
        train_dl = DataLoader(train_dataset,
                              batch_size=32,
                              sampler=WeightedRandomSampler(
                                  train_dataset.get_label_weight(),
                                  num_samples=5000),
                              num_workers=0,
                              drop_last=True)
    else:
        train_dl = DataLoader(train_dataset,
                              batch_size=32,
                              shuffle=True,
                              num_workers=16,
                              drop_last=True)
    test_dl = DataLoader(test_dataset,
                         batch_size=32,
                         num_workers=8,
                         drop_last=True)

    solver = Solver(cfg)

    solver.train(train_dl, test_dl)
コード例 #18
0
    def test_track2(self):
        val_prop = 0.2
        grouping_variables = ['Covid_status', 'Gender'] ##For Stratified Split and Sampling
        data_path = f'{self.data_base_dir}/DiCOVA_Train_Val_Data_Release/metadata.csv'
        samples = 1000
        batch_size = 2

        data_path = f'{self.data_base_dir}/DiCOVA_Track_2_Release/metadata.csv'
        subdatasets = ['breathing-deep', 'counting-normal', 'vowel-e']

        for subdataset in subdatasets:
            dataset = DiCOVATrack2(data_path, grouping_variables, subdataset)
            train, val = train_test_split(dataset.classes, test_size=val_prop,
                    stratify=dataset.classes['factor'])

            dataset_train = Subset(dataset, train['idx'])
            dataset_train_weights = [1/(len(dataset.counts.keys())*dataset.counts[dataset.classes.loc[a]['factor']]) if a in train['idx'] else 0 for a in range(len(dataset))]
            train_sampler = WeightedRandomSampler(
                weights = dataset_train_weights,
                replacement=True,
                num_samples = samples)

            train_loader = DataLoader(
                dataset=dataset_train,
                batch_size = batch_size,
                sampler = train_sampler, # this is necessar for some reason
                pin_memory = True,
                collate_fn = dataset.collate_batch
            )

            for i, (inputs, labels) in enumerate(train_loader):
                print(inputs)
                print(labels)
                break
コード例 #19
0
    def _get_data_loader(self, config, names=None):
        transformations = self.__transform_func()
        if names is None:
            train_names, val_names = self._split_data(self.__train_dir, config.train_percent)
        else:
            train_names, val_names = names

        loader = self.__get_loader(config)

        train_folder = ImageFolder(self.__label_file, self.__train_dir, train_names, transform=transformations['train'],
                                   loader=loader)
        val_folder = ImageFolder(self.__label_file, self.__train_dir, val_names, transform=transformations['val'],
                                 loader=loader)
        if not len(train_folder) or not len(val_folder):
            raise ValueError, 'One of the image folders contains zero data, train: %s, val: %s' % \
                              (len(train_folder), len(val_folder))

        sampler = None
        if config.weigh_sample:
            sampler = WeightedRandomSampler(train_folder.weights, len(train_folder), replacement=True)

        train_loader = torch.utils.data.DataLoader(train_folder, batch_size=config.batch_size, shuffle=True,
                                                   sampler=sampler, num_workers=config.workers, pin_memory=True)
        val_loader = torch.utils.data.DataLoader(val_folder, batch_size=config.batch_size, shuffle=True,
                                                 num_workers=config.workers, pin_memory=True)

        return train_loader, val_loader
コード例 #20
0
ファイル: trainer.py プロジェクト: bcaitech1/p1-img-MaiHon
 def set_loader(self, trn_ds, val_ds, batch):
     if self.cfg.weighed_sampler:
         targets = torch.from_numpy(trn_ds.data.label.values)
         class_sample_count = torch.tensor(
             [(targets == t).sum() for t in torch.unique(targets, sorted=True)])
         weight = 1. / class_sample_count.float()
         samples_weight = torch.tensor([weight[t] for t in targets])
         sampler = WeightedRandomSampler(samples_weight.double(), len(samples_weight))
         shuffle = False
     else:
         sampler = None
         shuffle = True
     
     logging.info("Dataloader Setting...")
     self.trn_dl = DataLoader(
         trn_ds,
         batch_size=batch,
         shuffle=shuffle,
         num_workers=4,
         sampler=sampler,
         pin_memory = True
     )
     
     self.val_dl = DataLoader(
         val_ds,
         batch_size=batch,
         shuffle=False,
         num_workers=4,
         pin_memory = True
     )
     logging.info("Done.\n")
コード例 #21
0
ファイル: q_simulator.py プロジェクト: MarioDuran/qutorch
def get_counts(density_matrix, num_shots, target, print_results=False):

    dm_diag = torch.diagonal(density_matrix, 0)
    special_qubit = torch.diagonal(density_matrix, 0)[len(dm_diag) // 2 -
                                                      1:len(dm_diag) // 2 + 1]

    number_of_qubits = density_matrix.shape[1].bit_length() - 1
    sampler = WeightedRandomSampler(special_qubit, num_shots)
    format = '{0:0' + str(density_matrix.shape[1].bit_length() - 1) + 'b}'

    counter = Counter()
    for idx in sampler:
        counter[idx] += 1

    counter_s = sorted(counter.items())
    # m_counts = counter_s[0][1] - counter_s[1][1]
    m_counts = counter[0] - counter[1]

    return m_counts / num_shots

    if print_results:
        print('{')
        for element in sorted(counter.items()):
            print("\t\"" + format.format(element[0]) + '\" : ' +
                  str(element[1]))
        print('}')

    return counter
コード例 #22
0
ファイル: test_auto.py プロジェクト: vieozhu/ignite
def test_dist_proxy_sampler():
    import torch
    from torch.utils.data import WeightedRandomSampler

    weights = torch.ones(100)
    weights[:50] += 1
    num_samples = 200
    sampler = WeightedRandomSampler(weights, num_samples)

    num_replicas = 8
    dist_samplers = [
        DistributedProxySampler(sampler, num_replicas=num_replicas, rank=i)
        for i in range(num_replicas)
    ]

    for seed in range(100):
        torch.manual_seed(seed)
        true_indices = list(sampler)

        indices_per_rank = []
        for s in dist_samplers:
            s.set_epoch(seed)
            indices_per_rank += list(s)

        set_indices_per_rank = set(indices_per_rank)
        set_true_indices = set(true_indices)
        assert set_indices_per_rank == set_true_indices, "{} | {}".format(
            set_true_indices - set_indices_per_rank,
            set_indices_per_rank - set_true_indices)
コード例 #23
0
def setup_db_dl(train_batch_size=4, test_batch_size=4, get_data=get_data):
    full_dataset = DBDataset(TrainImage,
                             MathSymbol,
                             get_data,
                             get_label,
                             get_class_name,
                             filter=valid_func)
    test_train_split = 0.9
    train_size = int(test_train_split * len(full_dataset))
    test_size = len(full_dataset) - train_size
    train_dataset, test_dataset = random_split(full_dataset,
                                               [train_size, test_size])
    weights = torch.zeros(len(train_dataset))
    for i, data in enumerate(train_dataset):
        weights[i] = 1. / (math.log(full_dataset.class_counts[data[1]]) + 1.0)
    sampler = WeightedRandomSampler(weights, len(weights))

    dataloaders = {
        "train":
        DataLoader(train_dataset,
                   batch_size=train_batch_size,
                   num_workers=1,
                   sampler=sampler),
        "test":
        DataLoader(test_dataset,
                   batch_size=test_batch_size,
                   shuffle=True,
                   num_workers=1)
    }

    return dataloaders, full_dataset
コード例 #24
0
    def __init__(self,
                 dataset,
                 batch_size,
                 shuffle,
                 num_workers,
                 val_split=0.0,
                 weights=None):
        self.shuffle = shuffle
        self.dataset = dataset
        self.nbr_examples = len(dataset)

        if weights is not None:
            self.train_sampler = WeightedRandomSampler(
                weights, num_samples=self.nbr_examples, replacement=True)
            self.val_sampler = None
            self.shuffle = False
        elif val_split:
            self.train_sampler, self.val_sampler = self._split_sampler(
                val_split)
        else:
            self.train_sampler, self.val_sampler = None, None

        self.init_kwargs = {
            'dataset': self.dataset,
            'batch_size': batch_size,
            'shuffle': self.shuffle,
            'num_workers': num_workers,
            'pin_memory': True,
            'drop_last': True
        }
        super(BaseDataLoader, self).__init__(sampler=self.train_sampler,
                                             **self.init_kwargs)
コード例 #25
0
def detection_loader(dataset, train=True, batch_size=None):
    """
    Creates a training set dataloader for a dataset. Uses a sampler to load the same number of datapoints from
    both classes. The dataloader returns shuffled data, as a sampler is used to balance the classes (on average, half
    the samples seen will be quotes and the other half won't be).

    :param dataset: QuoteDetectionDataset
        The dataset used from which to load data
    :param train: boolean
        Whether to load a training or testing dataloader.
    :param batch_size: int.
        The batch size. The default is None, where the batch size is the size of the data.
    :return: torch.utils.data.DataLoader
        DataLoader for quote detection.
    """
    if batch_size is None:
        batch_size = len(dataset)
    if train:
        weights, num_samples = sampler_weights(dataset)
        sampler = WeightedRandomSampler(weights=weights,
                                        num_samples=num_samples,
                                        replacement=True)
        return DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          sampler=sampler)
    else:
        return DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=False)
コード例 #26
0
    def train_dataloader(self):
        if self.sampler:
            targets = []
            for target in self.targets_sampler:
                targets.append(target)
            targets = torch.tensor(targets).type(torch.long)
            # Compute samples weight (each sample should get its own weight)
            class_sample_count = torch.tensor([
                (targets == t).sum()
                for t in torch.unique(targets, sorted=True)
            ])
            weight = 1. / class_sample_count.float()
            samples_weight = torch.tensor([weight[t] for t in targets])

            # Create sampler, dataset, loader
            sampler = WeightedRandomSampler(samples_weight,
                                            len(samples_weight))
            shuffle = False
        else:
            shuffle = True
            sampler = None

        shuffle = False if self.overfit else True
        return DataLoader(self.train_dataset,
                          sampler=sampler,
                          batch_size=self.batch_size,
                          shuffle=shuffle,
                          num_workers=cpu_count(),
                          collate_fn=self.my_collate)
コード例 #27
0
ファイル: PixelLink.py プロジェクト: happog/FudanOCR
    def main():
        # loading data
        dataset = datasets.PixelLinkIC15Dataset(opt.train_images_dir,
                                                opt.train_labels_dir)
        sampler = WeightedRandomSampler([1 / len(dataset)] * len(dataset),
                                        opt.batch_size,
                                        replacement=True)
        dataloader = DataLoader(dataset,
                                batch_size=opt.batch_size,
                                sampler=sampler)
        my_net = net.Net()  # construct neural network

        # choose gpu or cpu
        if opt.gpu:
            device = torch.device("cuda:0")
            my_net = my_net.cuda()
            if opt.multi_gpu:
                my_net = nn.DataParallel(my_net)
        else:
            device = torch.device("cpu")

        # train, optimize
        my_net.apply(weight_init)
        optimizer = optim.SGD(my_net.parameters(),
                              lr=opt.learning_rate,
                              momentum=opt.momentum,
                              weight_decay=opt.weight_decay)
        optimizer2 = optim.SGD(my_net.parameters(),
                               lr=opt.learning_rate2,
                               momentum=opt.momentum,
                               weight_decay=opt.weight_decay)

        iteration = 0
        train(opt.epoch, iteration, dataloader, my_net, optimizer, optimizer2,
              device)
コード例 #28
0
def dataloader(cfg, mode):
    if mode == 'train':
        transforms = T.Compose([
            T.ToPILImage(),
            T.RandomHorizontalFlip(0.5),
            T.RandomVerticalFlip(0.5),
            T.RandomRotation(90),
            T.ToTensor(),
            T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ])
        dataset = Digestpath_fixed_list(cfg,mode,transforms)
        sampler = WeightedRandomSampler(dataset.weights,cfg['batch_size']*cfg['max_batch'],True)
        loader = torch.utils.data.DataLoader(dataset, 
            batch_size=cfg['batch_size'], sampler=sampler, num_workers=cfg['num_workers'])
    else:
        transforms = T.Compose([
            T.ToPILImage(),
            T.ToTensor(),
            T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        ])
        dataset = Digestpath_fixed_list(cfg,mode,transforms)
        if cfg['whole_image']:
            loader = dataset
        else:
            loader = torch.utils.data.DataLoader(dataset, batch_size=cfg['batch_size'],shuffle=True,num_workers=cfg['num_workers'])
    if mode  == 'train':
        return loader, dataloader(cfg,'test')
    else:
        return loader
コード例 #29
0
def build_dataloader_fuse(dataset,
                          imgs_per_gpu,
                          workers_per_gpu,
                          num_gpus=1,
                          drop_last=True,
                          shuffle=True,
                          dist=False,
                          **kwargs):
    batch_size = num_gpus * imgs_per_gpu
    num_workers = num_gpus * workers_per_gpu
    sampler = WeightedRandomSampler(dataset.density, len(dataset))

    print(f"Building dataloader with batch_size {batch_size}")
    if shuffle:
        data_loader = DataLoader(dataset,
                                 batch_size=batch_size,
                                 sampler=sampler,
                                 num_workers=num_workers,
                                 collate_fn=partial(
                                     collate, samples_per_gpu=imgs_per_gpu),
                                 pin_memory=False,
                                 drop_last=drop_last,
                                 **kwargs)
    else:
        data_loader = DataLoader(dataset,
                                 batch_size=batch_size,
                                 shuffle=shuffle,
                                 num_workers=num_workers,
                                 collate_fn=partial(
                                     collate, samples_per_gpu=imgs_per_gpu),
                                 pin_memory=False,
                                 drop_last=drop_last,
                                 **kwargs)

    return data_loader
コード例 #30
0
    def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1):
        weights = None
        data_items = dataset.samples

        if getattr(config, "use_language_weighted_sampler", False):
            alpha = getattr(config, "language_weighted_sampler_alpha", 1.0)
            print(" > Using Language weighted sampler with alpha:", alpha)
            weights = get_language_balancer_weights(data_items) * alpha

        if getattr(config, "use_speaker_weighted_sampler", False):
            alpha = getattr(config, "speaker_weighted_sampler_alpha", 1.0)
            print(" > Using Speaker weighted sampler with alpha:", alpha)
            if weights is not None:
                weights += get_speaker_balancer_weights(data_items) * alpha
            else:
                weights = get_speaker_balancer_weights(data_items) * alpha

        if weights is not None:
            sampler = WeightedRandomSampler(weights, len(weights))
        else:
            sampler = None

        # sampler for DDP
        if sampler is None:
            sampler = DistributedSampler(dataset) if num_gpus > 1 else None
        else:  # If a sampler is already defined use this sampler and DDP sampler together
            sampler = DistributedSamplerWrapper(
                sampler) if num_gpus > 1 else sampler

        return sampler