def get_train_valid_loader(train_dataset, test_size, shuffle=True): num_train = len(train_dataset) num_train_class = torch.zeros(num_classes) indices = list(range(num_train)) split = int(np.floor(test_size * num_train)) if shuffle == True: np.random.seed(random_seed) np.random.shuffle(indices) train_idx, test_idx = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_idx) test_sampler = SubsetRandomSampler(test_idx) train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers) # for vid, target in train_loader: # num_train_class[target.cpu().numpy()[0]] += 1 # class_weights = 1 - (num_train_class / (num_train - split) ).cuda() class_weights = None test_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=test_sampler, num_workers=num_workers) return train_loader, test_loader, class_weights
def get_class_weights(train_dataset, train_loader): num_train = len(train_dataset) num_train_class = torch.zeros(num_classes) cnt = 0 for vid, mask, target in train_dataset.itms: cnt += 1 num_train_class[target] += 1 class_weights = (float(num_train) / num_train_class).cuda() class_weights /= class_weights.min() print num_train_class print class_weights return class_weights
def get_train_valid_loader(train_dataset, train_size, shuffle=True, sampling=False): num_train = len(train_dataset) num_train_class = torch.zeros(num_classes) indices = np.asarray(list(range(num_train))) split = int(np.floor(train_size * num_train)) if shuffle == True: np.random.seed(random_seed) np.random.shuffle(indices) train_idx, test_idx = indices[:split], indices[split:] if sampling: train_labels = np.asarray(train_dataset.returnlabels()) sampler_weights = np.ones((len(train_idx))) sampler_weights[train_labels[train_idx] == 1] = 0.25 sampler_weights[train_labels[train_idx] == 8] = 0.5 sampler_weights[train_labels[train_idx] == 14] = 0.5 sampler_weights[train_labels[train_idx] == 22] = 0.5 num_train_class = [ 2386, 122270 / 4, 372, 5045, 1085, 9, 174, 175, 20005 / 2, 10, 224, 209, 2, 1876, 20349 / 2, 63, 569, 7, 2899, 273, 79, 21, 21471 / 2, 4557 ] num_train_class = torch.FloatTensor(num_train_class) class_weights = (split / num_train_class).cuda() train_sampler = WeightedRandomSampler(sampler_weights, len(train_idx), replacement=False) else: train_sampler = SubsetRandomSampler(train_idx) num_train_class = [ 2386, 122270, 372, 5045, 1085, 9, 174, 175, 20005, 10, 224, 209, 2, 1876, 20349, 63, 569, 7, 2899, 273, 79, 21, 21471, 4557 ] num_train_class = torch.FloatTensor(num_train_class) class_weights = (split / num_train_class).cuda() # class_weights = None test_sampler = SubsetRandomSampler(test_idx) train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers) test_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=test_sampler, num_workers=num_workers) return train_loader, test_loader, class_weights