Example #1
0
 def act(self, observation: Any, reward: Any, done: bool, info: Optional[Dict[Any, Any]] = None) -> Any:
     obs = torch.from_numpy(observation.astype(np.float32))
     forward = self.module.forward(obs)  # type: ignore
     probas = F.softmax(forward, dim=0)
     if self.deterministic:
         return probas.max(0)[1].view(1, 1).item()
     else:
         return next(iter(WeightedRandomSampler(probas, 1)))
Example #2
0
def build_balanced_sampler(labels, dataset_size=None):
    if dataset_size is None:
        dataset_size = len(labels)

    weights_per_class = [1/x for x in Counter(labels).values()]
    weights_per_example = [weights_per_class[c] for c in labels]

    return WeightedRandomSampler(weights_per_example, dataset_size, replacement=True)
Example #3
0
    def set_weights(self):
        """ sets the weights from the weighted dataset """

        # Make train/val weights
        self.train_weights = self.data_weighter.weighting_function(
            self.prop_train)
        self.val_weights = self.data_weighter.weighting_function(self.prop_val)

        # Create samplers
        self.train_sampler = WeightedRandomSampler(self.train_weights,
                                                   num_samples=len(
                                                       self.train_weights),
                                                   replacement=True)
        self.val_sampler = WeightedRandomSampler(self.val_weights,
                                                 num_samples=len(
                                                     self.val_weights),
                                                 replacement=True)
Example #4
0
def get_dataloaders(data_dir,
                    patch_size: int,
                    box_coder,
                    train_batch_size=1,
                    valid_batch_size=1,
                    workers=4,
                    fold=0,
                    fast=False):
    train_ids, valid_ids = get_train_test_split_for_fold(fold, ships_only=True)
    if fast:
        train_ids = train_ids[:train_batch_size * 64]
        valid_ids = valid_ids[:valid_batch_size * 64]

    groundtruth = pd.read_csv(
        os.path.join(data_dir, 'train_ship_segmentations_v2.csv'))

    trainset = D.RSSDDataset(sample_ids=train_ids,
                             data_dir=data_dir,
                             transform=get_transform(training=True,
                                                     width=patch_size,
                                                     height=patch_size),
                             groundtruth=groundtruth,
                             box_coder=box_coder)

    validset = D.RSSDDataset(sample_ids=valid_ids,
                             data_dir=data_dir,
                             transform=get_transform(training=False,
                                                     width=patch_size,
                                                     height=patch_size),
                             groundtruth=groundtruth,
                             box_coder=box_coder)

    shuffle = True
    sampler = None
    if fast:
        shuffle = False
        sampler = WeightedRandomSampler(np.ones(len(trainset)), 1024)

    trainloader = DataLoader(trainset,
                             batch_size=train_batch_size,
                             num_workers=workers,
                             pin_memory=True,
                             drop_last=True,
                             shuffle=shuffle,
                             sampler=sampler)

    validloader = DataLoader(
        validset,
        batch_size=valid_batch_size,
        num_workers=workers,
        pin_memory=True,
        drop_last=False,
        shuffle=False,
    )

    print('Train set', len(trainset), len(trainloader), 'Valid set',
          len(validset), len(validloader))
    return trainloader, validloader
def load_data():
    print("initializing dataloader")
    transforms_video = [
        CenterCrop(opt.image_size),
        RGB2Lab(),
        ToTensor(),
        Normalize(),
    ]
    transforms_imagenet = [
        CenterPad_threshold(opt.image_size),
        RGB2Lab(),
        ToTensor(),
        Normalize()
    ]
    extra_reference_transform = [
        transform_lib.RandomHorizontalFlip(0.5),
        transform_lib.RandomResizedCrop(480, (0.98, 1.0), ratio=(0.8, 1.2)),
    ]
    train_dataset_video = VideosDataset(
        data_root=opt.data_root,
        epoch=opt.epoch,
        image_size=opt.image_size,
        image_transform=transforms.Compose(transforms_video),
        real_reference_probability=opt.real_reference_probability,
        nonzero_placeholder_probability=opt.nonzero_placeholder_probability,
    )
    train_dataset_imagenet = VideosDataset_ImageNet(
        data_root=opt.data_root_imagenet,
        image_size=opt.image_size,
        epoch=opt.epoch,
        with_bad=opt.with_bad,
        with_mid=opt.with_mid,
        transforms_imagenet=transforms_imagenet,
        distortion_level=4,
        brightnessjitter=5,
        nonzero_placeholder_probability=opt.nonzero_placeholder_probability,
        extra_reference_transform=extra_reference_transform,
        real_reference_probability=opt.real_reference_probability,
    )

    video_training_length = len(train_dataset_video)
    imagenet_training_length = len(train_dataset_imagenet)
    dataset_training_length = train_dataset_video.real_len + train_dataset_imagenet.real_len
    dataset_combined = ConcatDataset(
        [train_dataset_video, train_dataset_imagenet])
    sampler = WeightedRandomSampler([1] * video_training_length +
                                    [1] * imagenet_training_length,
                                    dataset_training_length * opt.epoch)
    data_loader = DataLoader(
        dataset_combined,
        batch_size=opt.batch_size,
        shuffle=False,
        num_workers=opt.workers,
        pin_memory=True,
        drop_last=True,
        sampler=sampler,
    )
    return dataset_training_length, train_dataset_video, train_dataset_imagenet, data_loader
 def get_class_balanced_sampler(
     dataset
 ):
     assert isinstance(dataset, ClassDataset), 'dataset is an instance of ClassDataset.'
     indices = list(range(len(dataset)))
     num_samples = len(dataset)
     weights = [1.0 / dataset.per_label_records_num[dataset[index][3].item()] for index in indices]
     weights = torch.tensor(weights)
     return WeightedRandomSampler(weights, num_samples)
Example #7
0
def my_sampler(target):
    class_sample_count = np.array([len(np.where(target == t)[0]) for t in np.unique(target)])
    weight = 1. / class_sample_count
    samples_weight = np.array([weight[t] for t in target])

    samples_weight = torch.from_numpy(samples_weight)
    samples_weigth = samples_weight.double()
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
    return sampler
def create_weighted_sampler(labels):
    labels_unique, counts = np.unique(labels, return_counts=True)
    class_weights = [sum(counts) / c for c in counts]
    #class_weights[1] = class_weights[1]/2
    example_weights = [class_weights[int(e)] for e in labels]
    #print("Example Weights:")
    #print(example_weights)
    sampler = WeightedRandomSampler(example_weights, len(labels))
    return sampler
Example #9
0
def prepare_random_sampler(classes_list):
    class_sample_count = np.array(
        [len(np.where(classes_list == t)[0]) for t in np.unique(classes_list)])
    weight = 1. / class_sample_count
    samples_weight = np.array([weight[t] for t in classes_list])
    samples_weight = torch.from_numpy(samples_weight)
    samples_weight = samples_weight.double()
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
    return sampler
Example #10
0
def get_iwildcam_loader(cfg, mode='train'):
    print("Mode: {}".format(mode))
    if mode == 'train' or mode == 'train_val' or mode == 'train_dev':  # train_val
        train_data = iWildCam(cfg, mode=mode)  # 定义一个取数据的 迭代器
        if cfg.TRAIN.WEIGHT_SAMPLER:
            train_sampler = WeightedRandomSampler(train_data.samples_weight,
                                                  train_data.__len__())
            train_loader = torch.utils.data.DataLoader(
                train_data,
                batch_size=cfg.TRAIN.BATCH_SIZE,
                num_workers=cfg.TRAIN.NUM_WORKER,
                drop_last=True,
                pin_memory=True,
                sampler=train_sampler)
        else:
            train_loader = torch.utils.data.DataLoader(
                train_data,
                batch_size=cfg.TRAIN.BATCH_SIZE,
                shuffle=True,
                num_workers=cfg.TRAIN.NUM_WORKER,
                drop_last=True,
                pin_memory=True)

        dev_data = iWildCam(cfg, mode='dev')  # eval的数据

        dev_loader = torch.utils.data.DataLoader(
            dev_data,
            batch_size=cfg.TRAIN.EVAL_BATCH_SIZE,
            shuffle=False,
            num_workers=cfg.TRAIN.NUM_WORKER,
            drop_last=False,
            pin_memory=True)
        return train_loader, dev_loader
    elif mode in ['infer', 'infer_by_seq', 'infer_by_seqv2']:
        test_data = iWildCam(cfg, mode=mode)

        test_loader = torch.utils.data.DataLoader(
            test_data,
            batch_size=cfg.TRAIN.BATCH_SIZE,
            shuffle=False,
            num_workers=cfg.TRAIN.NUM_WORKER,
            drop_last=False,
            pin_memory=True)
        return test_loader
    elif mode == 'val':  # 仅用于验证模型的性能
        val_data = iWildCam(cfg, mode='dev')  # eval的数据

        val_loader = torch.utils.data.DataLoader(
            val_data,
            batch_size=cfg.TRAIN.EVAL_BATCH_SIZE,
            shuffle=False,
            num_workers=cfg.TRAIN.NUM_WORKER,
            drop_last=False,
            pin_memory=True)
        return val_loader
    else:
        return None
def create_dataloaders(X_train, y_train, X_valid, y_valid, weights, batch_size,
                       num_workers, drop_last):
    """
    Create dataloaders with or wihtout subsampling depending on weights and balanced.

    Parameters
    ----------
        X_train: np.ndarray
            Training data
        y_train: np.array
            Mapped Training targets
        X_valid: np.ndarray
            Validation data
        y_valid: np.array
            Mapped Validation targets
        weights : dictionnary or bool
            Weight for each mapped target class
            0 for no sampling
            1 for balanced sampling
    Returns
    -------
        train_dataloader, valid_dataloader : torch.DataLoader, torch.DataLoader
            Training and validation dataloaders
    """
    if weights == 0:
        train_dataloader = DataLoader(TorchDataset(X_train, y_train),
                                      batch_size=batch_size,
                                      shuffle=True,
                                      num_workers=num_workers,
                                      drop_last=drop_last)
    else:
        if weights == 1:
            class_sample_count = np.array(
                [len(np.where(y_train == t)[0]) for t in np.unique(y_train)])

            weights = 1. / class_sample_count

            samples_weight = np.array([weights[t] for t in y_train])

            samples_weight = torch.from_numpy(samples_weight)
            samples_weight = samples_weight.double()
        else:
            # custom weights
            samples_weight = np.array([weights[t] for t in y_train])
        sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
        train_dataloader = DataLoader(TorchDataset(X_train, y_train),
                                      batch_size=batch_size,
                                      sampler=sampler,
                                      num_workers=num_workers,
                                      drop_last=drop_last)

    valid_dataloader = DataLoader(TorchDataset(X_valid, y_valid),
                                  batch_size=batch_size,
                                  shuffle=False,
                                  num_workers=num_workers)

    return train_dataloader, valid_dataloader
Example #12
0
    def train(self, net, samples, optimizer, e):
        alpha = 2 * max(0, ((100 - e) / 100))
        criterion = losses.ELULovaszFocalWithLogitsLoss(alpha, 2 - alpha)

        transforms = generator.TransformationsGenerator([
            random.RandomFlipLr(),
            random.RandomAffine(image_size=101,
                                translation=lambda rs:
                                (rs.randint(-20, 20), rs.randint(-20, 20)),
                                scale=lambda rs: (rs.uniform(0.85, 1.15), 1),
                                **utils.transformations_options),
            transformations.Padding(((13, 14), (13, 14), (0, 0)))
        ])

        pseudo_dataset = datasets.SemiSupervisedImageDataset(
            samples_test,
            settings.test,
            transforms,
            size=len(samples_test),
            test_predictions=self.test_predictions,
            momentum=0.0)

        dataset = datasets.ImageDataset(samples, settings.train, transforms)
        weights = [len(pseudo_dataset) / len(dataset) * 2
                   ] * len(dataset) + [1] * len(pseudo_dataset)
        dataloader = DataLoader(ConcatDataset([dataset, pseudo_dataset]),
                                num_workers=10,
                                batch_size=16,
                                sampler=WeightedRandomSampler(
                                    weights=weights, num_samples=3200))

        average_meter_train = meters.AverageMeter()

        with tqdm(total=len(dataloader),
                  leave=False) as pbar, torch.enable_grad():
            net.train()

            for images, masks_targets in dataloader:
                masks_targets = masks_targets.to(gpu)
                masks_predictions = net(images)

                loss = criterion(masks_predictions, masks_targets)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

                average_meter_train.add('loss', loss.item())
                self.update_pbar(torch.sigmoid(masks_predictions),
                                 masks_targets, pbar, average_meter_train,
                                 'Training epoch {}'.format(e))

        train_stats = {
            'train_' + k: v
            for k, v in average_meter_train.get_all().items()
        }
        return train_stats
Example #13
0
    def __init__(self):
        ##The top config
        #self.data_root = '/media/hhy/data/USdata/MergePhase1/test_0.3'
        #self.log_dir = '/media/hhy/data/code_results/MILs/MIL_H_Attention'

        self.root = '/remote-home/my/Ultrasound_CV/data/Ruijin/clean'
        self.log_dir = '/remote-home/my/hhy/Ultrasound_MIL/experiments/weighted_sampler/'
        if not os.path.exists(self.log_dir):
            os.makedirs(self.log_dir)
        ##training config
        self.lr = 1e-4
        self.epoch = 50
        self.resume = -1
        self.batch_size = 1
        self.net = Attention()
        self.net.cuda()

        self.optimizer = Adam(self.net.parameters(), lr=self.lr)
        self.lrsch = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=[10, 30, 50, 70], gamma=0.5)

        
        self.logger = Logger(self.log_dir)
        self.train_transform =  transforms.Compose([
                transforms.Resize((224,224)),
                transforms.RandomResizedCrop((224,224)),
                transforms.RandomHorizontalFlip(0.5),
                transforms.RandomVerticalFlip(0.5),
                transforms.ColorJitter(0.25,0.25,0.25,0.25),
                transforms.ToTensor()
    ])
        self.test_transform = transforms.Compose([
                    transforms.Resize((224,224)),
                    transforms.ToTensor()
        ])

        self.trainbag = RuijinBags(self.root, [0,1,2,3],self.train_transform)
        self.testbag = RuijinBags(self.root, [4], self.test_transform)
        
        train_label_list = list(map(lambda x: int(x['label']), self.trainbag.patient_info))
        pos_ratio = sum(train_label_list) / len(train_label_list)
        print(pos_ratio)
        train_weight = [(1-pos_ratio) if x>0 else pos_ratio for x in train_label_list]

        self.train_sampler = WeightedRandomSampler(weights=train_weight, num_samples=len(self.trainbag))
        self.train_loader = DataLoader(self.trainbag, batch_size=self.batch_size, num_workers=8,
                            sampler=self.train_sampler)
        self.val_loader = DataLoader(self.testbag, batch_size=self.batch_size, shuffle=False, num_workers=8)

        if self.resume > 0:
            self.net, self.optimizer, self.lrsch, self.loss, self.global_step = self.logger.load(self.net, self.optimizer, self.lrsch, self.loss, self.resume)
        else:
            self.global_step = 0
        
        # self.trainer = MTTrainer(self.net, self.optimizer, self.lrsch, self.loss, self.train_loader, self.val_loader, self.logger, self.global_step, mode=2)
        self.trainer = MILTrainer_batch1(self.net, self.optimizer, self.lrsch, None, self.train_loader, self.val_loader, self.logger,
                                     self.global_step)
def prepare_data():
    #2.1 get files and split for K-fold dataset
    #2.1.1 read files
    train_ = get_files(config.train_data, "train")
    #val_data_list = get_files(config.val_data,"val")
    test_files = get_files(config.test_data, "test")
    """ 
    #2.1.2 split
    split_fold = StratifiedKFold(n_splits=3)
    folds_indexes = split_fold.split(X=origin_files["filename"],y=origin_files["label"])
    folds_indexes = np.array(list(folds_indexes))
    fold_index = folds_indexes[fold]

    #2.1.3 using fold index to split for train data and val data
    train_data_list = pd.concat([origin_files["filename"][fold_index[0]],origin_files["label"][fold_index[0]]],axis=1)
    val_data_list = pd.concat([origin_files["filename"][fold_index[1]],origin_files["label"][fold_index[1]]],axis=1)
    """
    train_data_list, val_data_list = train_test_split(train_,
                                                      test_size=0.15,
                                                      stratify=train_["label"])
    #2.1.4 load dataset
    #2.1.4.1 sampling
    train_dataset, val_dataset, test_dataset = AgriDataset(
        train_data_list), AgriDataset(val_data_list,
                                      train=False), AgriDataset(test_files,
                                                                test=True)
    if config.ifWeightedRandomShuffle is True:
        distribution = train_data_list.groupby(by=['label']).size()
        banlance = min(distribution)
        dic = {}
        for i in range(len(distribution)):
            dic[i] = distribution[i]
        weights = []
        for i in range(len(train_dataset)):
            weights.append(banlance / dic[train_dataset[i][1]])
        sampler = WeightedRandomSampler(weights,\
                                num_samples=len(weights),\
                                replacement=True)
        train_dataloader = DataLoader(train_dataset, batch_size=config.batch_size,\
                                    collate_fn=collate_fn,pin_memory=True,sampler=sampler)
    else:
        train_dataloader = DataLoader(train_dataset,
                                      batch_size=config.batch_size,
                                      shuffle=True,
                                      collate_fn=collate_fn,
                                      pin_memory=True)
    val_dataloader = DataLoader(val_dataset,
                                batch_size=config.batch_size,
                                shuffle=True,
                                collate_fn=collate_fn,
                                pin_memory=False)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=1,
                                 shuffle=False,
                                 pin_memory=False)
    return train_dataloader, val_dataloader, test_dataloader
 def sampler(self, mode: str, dataset: Dataset) -> Sampler:
     if "train" in mode:
         if self.config.DEBUG:
             return WeightedRandomSampler(
                 weights=np.ones(len(dataset)),
                 num_samples=self.config.DEBUG_TRAIN_SIZE)
         else:
             return RandomSampler(dataset, replacement=False)
     else:
         return SequentialSampler(dataset)
Example #16
0
def create_data_loader(dataset, counters, parameters, init_sampler):
    labels = [label for _, label in dataset.the_list]
    class_weights = [dataset.__len__() / counters[label] for label in label_names().values()]
    weights = [class_weights[labels[i]] for i in range(dataset.__len__())]
    if init_sampler:
        sampler = WeightedRandomSampler(weights=weights, num_samples=dataset.__len__())
        data_loader = DataLoader(dataset=dataset, batch_size=parameters['batch_size'], sampler=sampler)
    else:
        data_loader = DataLoader(dataset=dataset, batch_size=parameters['batch_size'])
    return data_loader
Example #17
0
def resampling_balance(data):
    targets = data.target
    class_count = np.unique(targets, return_counts=True)[1]
    print("Class number before resampling: ", class_count)

    weight = 1. / class_count
    samples_weight = weight[targets]
    samples_weight = torch.from_numpy(samples_weight)
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
    return sampler
Example #18
0
    def forward(self, bsz, seq_len, cuda=True):
        # returns bsz*seq_len*nsamples samples in shape nsamples x (bsz x seq_len)

        # sample based on frequencies
        wrs = WeightedRandomSampler(self.frequencies,
                                    self.nsamples * bsz * seq_len)
        samples = torch.LongTensor(
            list(wrs)).cuda() if cuda else torch.LongTensor(list(wrs))

        return samples.view(-1, bsz)
Example #19
0
 def __init__(self, danspeech_multi_dataset, num_replicas=None, rank=None):
     super(DistributedWeightedSamplerCustom,
           self).__init__(danspeech_multi_dataset,
                          num_replicas=num_replicas,
                          rank=rank,
                          shuffle=False)
     self.sampler = WeightedRandomSampler(
         danspeech_multi_dataset.final_weights,
         len(danspeech_multi_dataset))
     self.epoch = 0
Example #20
0
    def get(self):
        """Obtains an instance of the sampler. """
        gen = torch.Generator()
        gen.manual_seed(self.random_seed)

        # Samples without replacement using the sample weights
        return WeightedRandomSampler(weights=self.sample_weights,
                                     num_samples=self.partition_size,
                                     replacement=False,
                                     generator=gen)
Example #21
0
 def _create_sampler(self, target_np):
     self.labels = np.unique(target_np)
     class_sample_count = np.array(
         [len(np.where(target_np == t)[0]) for t in self.labels])
     weight = 1. / class_sample_count
     samples_weight = torch.from_numpy(
         np.array([weight[t] for t in target_np])).double()
     return WeightedRandomSampler(samples_weight,
                                  len(samples_weight),
                                  replacement=True)
Example #22
0
 def _construct_dataloaders(self):
     dataloaders = []
     for dataset, weights in zip(self._datasets, self._weights):
         sampler = WeightedRandomSampler(weights, len(weights))
         dataloaders.append(
             DataLoader(dataset=dataset,
                        sampler=sampler,
                        num_workers=self._num_workers,
                        batch_size=self._batch_size))
     return [iter(loader) for loader in dataloaders]
Example #23
0
 def _get_balanced_dev_dataloader(self, dataset, drop_last=False):
     return DataLoader(
         dataset,
         sampler=WeightedRandomSampler(dataset.sample_weights,
                                       len(dataset.sample_weights)),
         batch_size=self.datarc["batch_size"],
         drop_last=drop_last,
         num_workers=self.datarc["num_workers"],
         collate_fn=dataset.collate_fn,
     )
Example #24
0
def balance_sources_sampler(dataset, strength):
    
    srcs = dataset.sources
    sources_count = [list(srcs.values()).count(source) for source in set(list(srcs.values()))]
    sources_count_dict = dict(zip(list(set(list(srcs.values()))), sources_count))

    weights = [1/(sources_count_dict[srcs[image_id]] + 1.0 / strength) for image_id in list(srcs.keys())]
    sampler = WeightedRandomSampler(weights, len(dataset))

    return sampler
Example #25
0
def _get_sampler(df: pd.DataFrame, alpha: float = 0.5) -> Sampler:
    y = np.array(
        [HumanProteinDataset.parse_target(target) for target in df.Target])
    class_weights = np.round(np.log(alpha * y.sum() / y.sum(axis=0)), 2)
    class_weights[class_weights < 1.0] = 1.0

    weights = np.zeros(len(df))
    for i, target in enumerate(y):
        weights[i] = class_weights[target == 1].max()

    return WeightedRandomSampler(weights, len(df))
def k_fold(logger,k,root,val_root,epoch,args,critertion,optimizer,k_model,device,loss_meter,confusion_matrix,train_acc,loss_list,val_acc):
    trained_time = 0
    best_accracy = 0
    best_model = None
    avg_accuracy = 0
    avg_loss = 0
    avg_train_acc = 0
    end = time.time()
    for i in range(k):
        get_k_fold_data(k,i,'dataset/all_shuffle_datas.txt')
        train_transform = train_augment(cfg.IMAGE_SIZE)
        train_data=Eye(img_root=root,tag_root='dataset/train_k.txt',transform=train_transform)
        data_len=train_data.__len__()
        weight_prob=[data_len/w for w in [1,6,1,1,0.4,0.8,2.5]]
        weight_list=[weight_prob[label] for data,label in train_data]
        train_sampler = WeightedRandomSampler(weights=weight_list,num_samples=7*2000,replacement=True)
        train_dataloader=DataLoader(train_data,batch_size=cfg.BATCHSIZE,shuffle=(train_sampler==None),drop_last=True,sampler=train_sampler,num_workers=8)
        
        val_transform = val_augment(cfg.IMAGE_SIZE)
        val_data=Eye(img_root=root,tag_root='dataset/val_k.txt',transform=val_transform)
        val_dataloader=DataLoader(val_data,batch_size=cfg.BATCHSIZE,shuffle=False,drop_last=True,num_workers=8)

        k_model[i],train_loss,train_accuracy = train(train_dataloader,critertion,optimizer,k_model[i],device,loss_meter,confusion_matrix)

        val_cm,val_accuracy = val(k_model[i],val_dataloader,device)

        avg_accuracy+=val_accuracy
        avg_train_acc+=train_accuracy
        avg_loss+=train_loss
        trained_time = time.time() - end
        end = time.time()
        log_str = [
            "Epoch:{:02d}, Fold:{:02d}, Lr:{:.8f}, Cost:{:.2f}s".format(epoch,i,
                optimizer.param_groups[0]['lr'], trained_time),
            "Loss:{:.2f}".format(train_loss),
            "train_acc:{:.2f}".format(train_accuracy),
            "val_acc:{:.2f}".format(val_accuracy)
                ]
        logger.info(log_str)
        if val_accuracy>best_accracy:
            best_model = k_model[i]

    avg_accuracy = avg_accuracy/k
    avg_train_acc = avg_train_acc/k
    avg_loss = avg_loss/k

    val_acc.append(avg_accuracy)
    train_acc.append(avg_train_acc)
    loss_list.append(avg_loss)

    log_str = "Epoch:{:2d}".format(epoch)+"--"+"avg_loss:{:2f}".format(avg_loss)+"--"+"avg_train_accuracy:{:2f}".format(avg_train_acc)+"--"+"avg_val_accuracy:{:2f}".format(avg_accuracy)
    logger.info(log_str)
    t.save(best_model.state_dict(),os.path.join(cfg.OUTPUT_MODEL_DIR, args.model+'test{:2d}.pth'.format(epoch)))
    return k_model
Example #27
0
 def get_class_balanced_sampler(dataset):
     if not hasattr(dataset, 'per_label_records_num'):
         return RandomSampler(dataset)
     indices = list(range(len(dataset)))
     num_samples = len(dataset)
     weights = [
         1.0 / dataset.per_label_records_num[dataset[index][3].item()]
         for index in indices
     ]
     weights = torch.tensor(weights)
     return WeightedRandomSampler(weights, num_samples)
Example #28
0
    def train_dataloader(self) -> DataLoader:
        sourceSet = self.dataSets['source']
        targetSet = self.dataSets['targetTrain']
        STSet = ConcatDataset([sourceSet, targetSet])

        source_weights = [1.0 / len(sourceSet) for _ in range(len(sourceSet))]
        target_weights = [1.0 / len(targetSet) for _ in range(len(targetSet))]
        weights = [*source_weights, *target_weights]

        sampler = WeightedRandomSampler(weights=weights, num_samples=len(STSet), replacement=True)
        return DataLoader(STSet, sampler=sampler, batch_size=self.batch_size, num_workers=self.num_workers)
def class_imbalance_sampler(targets, segmentation_threshold):
    if len(targets.shape) > 1:  # if posed as segmentation task
        targets = targets.sum(axis=1) / targets.shape[1]
        targets = targets > segmentation_threshold

    targets = tensor(targets).long().squeeze()
    class_count = torch.bincount(targets)
    weighting = tensor(1.) / class_count.float()
    weights = weighting[targets]
    sampler = WeightedRandomSampler(weights, len(targets))
    return sampler
    def create_sampler(self):
        class_weights = self.train_data.train['class'].value_counts().to_dict()
        for k, v in class_weights.items():
            class_weights[k] = 1. / torch.tensor(v, dtype=torch.float)
        sample_weights = [0] * len(self.train_data.train)
        for idx, label in enumerate(self.train_data.train['class']):
            sample_weights[idx] = class_weights[label]

        return WeightedRandomSampler(sample_weights,
                                     num_samples=len(sample_weights),
                                     replacement=True)