def __init__(self, root, train=True, download=False, noise_type=None, noise_rate=0.2, random_state=0): self.root = os.path.expanduser(root) self.train = train # training set or test set self.dataset = 'mnist' self.noise_type = noise_type if download: self.download() if not self._check_exists(): os.makedirs(os.path.join(self.root, self.processed_folder)) self.processing() if self.train: with open(os.path.join(self.root, self.processed_folder, self.training_file), mode="rb") as handle: self.train_data, self.train_labels = pickle.load(handle) self.train_data = self.train_data.astype(dtype=np.float32) / 255.0 if noise_type != 'clean': self.train_labels = np.asarray([[self.train_labels[i]] for i in range(len(self.train_labels))]) print(self.train_labels.shape) self.train_noisy_labels, self.actual_noise_rate = noisify(train_labels=self.train_labels, noise_type=noise_type, noise_rate=noise_rate, random_state=random_state) self.train_noisy_labels = [i[0] for i in self.train_noisy_labels] _train_labels = [i[0] for i in self.train_labels] self.noise_or_not = np.transpose(self.train_noisy_labels) == np.transpose(_train_labels) print("noise or not: ", self.noise_or_not.shape) else: with open(os.path.join(self.root, self.processed_folder, self.test_file), mode="rb") as handle: self.test_data, self.test_labels = pickle.load(handle) self.test_data = self.test_data.astype(dtype=np.float32) / 255.0
def __init__(self, root, train=True, transform=None, target_transform=None, download=False, noise_type=None, noise_rate=0.2, random_state=0): self.root = os.path.expanduser(root) self.transform = transform self.target_transform = target_transform self.train = train # training set or test set self.dataset = 'mnist' self.noise_type = noise_type if download: self.download() if not self._check_exists(): os.makedirs(os.path.join(self.root, self.processed_folder)) self.processing() if self.train: self.train_data, self.train_labels = torch.load( os.path.join(self.root, self.processed_folder, self.training_file)) print(self.train_labels.size()) if noise_type != 'clean': self.train_labels = np.asarray( [[self.train_labels[i]] for i in range(len(self.train_labels))]) print(self.train_labels.shape) self.train_noisy_labels, self.actual_noise_rate = noisify( train_labels=self.train_labels, noise_type=noise_type, noise_rate=noise_rate, random_state=random_state) self.train_noisy_labels = [ i[0] for i in self.train_noisy_labels ] _train_labels = [i[0] for i in self.train_labels] self.noise_or_not = np.transpose( self.train_noisy_labels) == np.transpose(_train_labels) else: self.test_data, self.test_labels = torch.load( os.path.join(self.root, self.processed_folder, self.test_file))
def __init__(self, root, train=True, download=False, noise_type=None, noise_rate=0.2, random_state=0): self.root = os.path.expanduser(root) self.train = train # training set or test set self.dataset = 'cifar10' self.noise_type = noise_type self.nb_classes = 10 if download: self.download() if not self._check_integrity(): raise RuntimeError( 'Dataset not found or corrupted. You can use download=True to download it' ) # now load the picked numpy arrays if self.train: self.train_data = [] self.train_labels = [] for fentry in self.train_list: f = fentry[0] file = os.path.join(self.root, self.base_folder, f) fo = open(file, 'rb') if sys.version_info[0] == 2: entry = pickle.load(fo) else: entry = pickle.load(fo, encoding='latin1') self.train_data.append(entry['data']) if 'labels' in entry: self.train_labels += entry['labels'] else: self.train_labels += entry['fine_labels'] fo.close() self.train_data = np.concatenate(self.train_data) self.train_data = self.train_data.reshape((50000, 3, 32, 32)) self.train_data = self.train_data.transpose( (0, 2, 3, 1)) # convert to HWC self.train_data = self.train_data.astype(dtype=np.float32) / 255.0 # if noise_type is not None: if noise_type != 'clean': # noisify train data self.train_labels = np.asarray( [[self.train_labels[i]] for i in range(len(self.train_labels))]) self.train_noisy_labels, self.actual_noise_rate = noisify( train_labels=self.train_labels, noise_type=noise_type, noise_rate=noise_rate, random_state=random_state, nb_classes=self.nb_classes) self.train_noisy_labels = [ i[0] for i in self.train_noisy_labels ] _train_labels = [i[0] for i in self.train_labels] self.noise_or_not = np.transpose( self.train_noisy_labels) == np.transpose(_train_labels) else: f = self.test_list[0][0] file = os.path.join(self.root, self.base_folder, f) fo = open(file, 'rb') if sys.version_info[0] == 2: entry = pickle.load(fo) else: entry = pickle.load(fo, encoding='latin1') self.test_data = entry['data'] if 'labels' in entry: self.test_labels = entry['labels'] else: self.test_labels = entry['fine_labels'] fo.close() self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose( (0, 2, 3, 1)) # convert to HWC self.test_data = self.test_data.astype(dtype=np.float32) / 255.0
def __init__(self, root, train=0, transform=None, target_transform=None, noise_type='clean', noise_rate=0.00, device=1, redux=None, image_size=None): base_folder = root self.image_folder = join(base_folder, 'Images') self.data_list_f = join(base_folder, "data_list2.json") self.label_folder = join(base_folder, 'Descriptions') self.labelOrder = ['benign', 'malignant'] self.root = root self.transform = transform self.target_transform = target_transform self.train = train # training set or test set self.device = device # 0: hardware; 1: RAM self.noise_type = noise_type self.random_state = 0 with open(self.data_list_f, 'r') as data_f: data_dict = json.load(data_f) if self.train == 0: self.data_list = data_dict['train'] elif self.train == 1: self.data_list = data_dict['test'] else: self.data_list = data_dict['val'] if redux: self.data_list = self.data_list[:redux] if image_size == None: self.imageTransform = transforms.Compose( [transforms.Resize((720, 720), interpolation=Image.NEAREST)]) else: self.imageTransform = transforms.Compose([ transforms.Resize((image_size, image_size), interpolation=Image.NEAREST) ]) print("Loading data from {}".format(self.label_folder)) # now load the picked numpy arrays self.data = [] self.labels = [] for f in self.data_list: file = join(self.label_folder, f) ff = open(file) entry = json.load(ff) try: flabel = entry['meta']['clinical']['benign_malignant'] if not flabel in self.labelOrder: raise Exception label_ = self.labelOrder.index(flabel) except: label_ = 0 # All 19 kinds,0-17 normal label, 18 as exception data_ = join(self.image_folder, f + '.jpeg') #print(data_) assert os.path.isfile(data_) if self.device == 1: data_ = self.img_loader(data_) self.data.append(data_) self.labels.append(label_) if self.device == 1: self.data == np.concatenate(self.data) # noisy labels self.labels = np.asarray(self.labels) if noise_type == 'clean': self.noise_or_not = np.ones([len(self.labels)], dtype=np.bool) else: self.noisy_labels, self.actual_noise_rate = noisify( dataset="ISIC", nb_classes=2, train_labels=np.expand_dims(self.labels, 1), noise_type=noise_type, noise_rate=noise_rate, random_state=self.random_state) self.noisy_labels = self.noisy_labels.squeeze() self.noise_or_not = self.noisy_labels == self.labels