Exemple #1
0
    def __init__(self, root, train=True, download=False, noise_type=None, noise_rate=0.2, random_state=0):
        self.root = os.path.expanduser(root)
        self.train = train  # training set or test set
        self.dataset = 'mnist'
        self.noise_type = noise_type

        if download:
            self.download()

        if not self._check_exists():
            os.makedirs(os.path.join(self.root, self.processed_folder))
            self.processing()

        if self.train:
            with open(os.path.join(self.root, self.processed_folder, self.training_file), mode="rb") as handle:
                self.train_data, self.train_labels = pickle.load(handle)
                self.train_data = self.train_data.astype(dtype=np.float32) / 255.0
            if noise_type != 'clean':
                self.train_labels = np.asarray([[self.train_labels[i]] for i in range(len(self.train_labels))])
                print(self.train_labels.shape)
                self.train_noisy_labels, self.actual_noise_rate = noisify(train_labels=self.train_labels,
                                                                          noise_type=noise_type,
                                                                          noise_rate=noise_rate,
                                                                          random_state=random_state)
                self.train_noisy_labels = [i[0] for i in self.train_noisy_labels]
                _train_labels = [i[0] for i in self.train_labels]
                self.noise_or_not = np.transpose(self.train_noisy_labels) == np.transpose(_train_labels)
                print("noise or not: ", self.noise_or_not.shape)
        else:
            with open(os.path.join(self.root, self.processed_folder, self.test_file), mode="rb") as handle:
                self.test_data, self.test_labels = pickle.load(handle)
                self.test_data = self.test_data.astype(dtype=np.float32) / 255.0
Exemple #2
0
    def __init__(self,
                 root,
                 train=True,
                 transform=None,
                 target_transform=None,
                 download=False,
                 noise_type=None,
                 noise_rate=0.2,
                 random_state=0):
        self.root = os.path.expanduser(root)
        self.transform = transform
        self.target_transform = target_transform
        self.train = train  # training set or test set
        self.dataset = 'mnist'
        self.noise_type = noise_type

        if download:
            self.download()

        if not self._check_exists():
            os.makedirs(os.path.join(self.root, self.processed_folder))
            self.processing()

        if self.train:
            self.train_data, self.train_labels = torch.load(
                os.path.join(self.root, self.processed_folder,
                             self.training_file))
            print(self.train_labels.size())
            if noise_type != 'clean':
                self.train_labels = np.asarray(
                    [[self.train_labels[i]]
                     for i in range(len(self.train_labels))])
                print(self.train_labels.shape)
                self.train_noisy_labels, self.actual_noise_rate = noisify(
                    train_labels=self.train_labels,
                    noise_type=noise_type,
                    noise_rate=noise_rate,
                    random_state=random_state)
                self.train_noisy_labels = [
                    i[0] for i in self.train_noisy_labels
                ]
                _train_labels = [i[0] for i in self.train_labels]
                self.noise_or_not = np.transpose(
                    self.train_noisy_labels) == np.transpose(_train_labels)
        else:
            self.test_data, self.test_labels = torch.load(
                os.path.join(self.root, self.processed_folder, self.test_file))
Exemple #3
0
    def __init__(self,
                 root,
                 train=True,
                 download=False,
                 noise_type=None,
                 noise_rate=0.2,
                 random_state=0):
        self.root = os.path.expanduser(root)
        self.train = train  # training set or test set
        self.dataset = 'cifar10'
        self.noise_type = noise_type
        self.nb_classes = 10

        if download:
            self.download()

        if not self._check_integrity():
            raise RuntimeError(
                'Dataset not found or corrupted. You can use download=True to download it'
            )

        # now load the picked numpy arrays
        if self.train:
            self.train_data = []
            self.train_labels = []
            for fentry in self.train_list:
                f = fentry[0]
                file = os.path.join(self.root, self.base_folder, f)
                fo = open(file, 'rb')
                if sys.version_info[0] == 2:
                    entry = pickle.load(fo)
                else:
                    entry = pickle.load(fo, encoding='latin1')
                self.train_data.append(entry['data'])
                if 'labels' in entry:
                    self.train_labels += entry['labels']
                else:
                    self.train_labels += entry['fine_labels']
                fo.close()

            self.train_data = np.concatenate(self.train_data)
            self.train_data = self.train_data.reshape((50000, 3, 32, 32))
            self.train_data = self.train_data.transpose(
                (0, 2, 3, 1))  # convert to HWC
            self.train_data = self.train_data.astype(dtype=np.float32) / 255.0
            # if noise_type is not None:
            if noise_type != 'clean':
                # noisify train data
                self.train_labels = np.asarray(
                    [[self.train_labels[i]]
                     for i in range(len(self.train_labels))])
                self.train_noisy_labels, self.actual_noise_rate = noisify(
                    train_labels=self.train_labels,
                    noise_type=noise_type,
                    noise_rate=noise_rate,
                    random_state=random_state,
                    nb_classes=self.nb_classes)
                self.train_noisy_labels = [
                    i[0] for i in self.train_noisy_labels
                ]
                _train_labels = [i[0] for i in self.train_labels]
                self.noise_or_not = np.transpose(
                    self.train_noisy_labels) == np.transpose(_train_labels)
        else:
            f = self.test_list[0][0]
            file = os.path.join(self.root, self.base_folder, f)
            fo = open(file, 'rb')
            if sys.version_info[0] == 2:
                entry = pickle.load(fo)
            else:
                entry = pickle.load(fo, encoding='latin1')
            self.test_data = entry['data']
            if 'labels' in entry:
                self.test_labels = entry['labels']
            else:
                self.test_labels = entry['fine_labels']
            fo.close()
            self.test_data = self.test_data.reshape((10000, 3, 32, 32))
            self.test_data = self.test_data.transpose(
                (0, 2, 3, 1))  # convert to HWC
            self.test_data = self.test_data.astype(dtype=np.float32) / 255.0
Exemple #4
0
    def __init__(self,
                 root,
                 train=0,
                 transform=None,
                 target_transform=None,
                 noise_type='clean',
                 noise_rate=0.00,
                 device=1,
                 redux=None,
                 image_size=None):
        base_folder = root
        self.image_folder = join(base_folder, 'Images')
        self.data_list_f = join(base_folder, "data_list2.json")
        self.label_folder = join(base_folder, 'Descriptions')

        self.labelOrder = ['benign', 'malignant']
        self.root = root
        self.transform = transform
        self.target_transform = target_transform
        self.train = train  # training set or test set
        self.device = device  # 0: hardware; 1: RAM
        self.noise_type = noise_type
        self.random_state = 0

        with open(self.data_list_f, 'r') as data_f:
            data_dict = json.load(data_f)

        if self.train == 0:
            self.data_list = data_dict['train']
        elif self.train == 1:
            self.data_list = data_dict['test']
        else:
            self.data_list = data_dict['val']

        if redux:
            self.data_list = self.data_list[:redux]

        if image_size == None:
            self.imageTransform = transforms.Compose(
                [transforms.Resize((720, 720), interpolation=Image.NEAREST)])
        else:
            self.imageTransform = transforms.Compose([
                transforms.Resize((image_size, image_size),
                                  interpolation=Image.NEAREST)
            ])

        print("Loading data from {}".format(self.label_folder))
        # now load the picked numpy arrays
        self.data = []
        self.labels = []
        for f in self.data_list:
            file = join(self.label_folder, f)
            ff = open(file)
            entry = json.load(ff)
            try:
                flabel = entry['meta']['clinical']['benign_malignant']
                if not flabel in self.labelOrder:
                    raise Exception
                label_ = self.labelOrder.index(flabel)
            except:
                label_ = 0  # All 19 kinds,0-17 normal label, 18 as exception
            data_ = join(self.image_folder, f + '.jpeg')
            #print(data_)
            assert os.path.isfile(data_)
            if self.device == 1:
                data_ = self.img_loader(data_)
            self.data.append(data_)
            self.labels.append(label_)

        if self.device == 1:
            self.data == np.concatenate(self.data)

        # noisy labels
        self.labels = np.asarray(self.labels)
        if noise_type == 'clean':
            self.noise_or_not = np.ones([len(self.labels)], dtype=np.bool)
        else:
            self.noisy_labels, self.actual_noise_rate = noisify(
                dataset="ISIC",
                nb_classes=2,
                train_labels=np.expand_dims(self.labels, 1),
                noise_type=noise_type,
                noise_rate=noise_rate,
                random_state=self.random_state)
            self.noisy_labels = self.noisy_labels.squeeze()
            self.noise_or_not = self.noisy_labels == self.labels