def __init__(self, r, transform, mode, pred=[], probability=[], log=''): self.r = r # noise ratio self.transform = transform self.mode = mode train_loader, val_loader = get_chexpert_loaders(r, batch_size=32) if self.mode == 'test': self.test_data = val_loader.get_all_samples() self.test_label = val_loader.get_all_real_ground_truth() else: train_label = train_loader.get_all_real_ground_truth() train_data = train_loader.get_all_samples() noise_label = train_loader.get_all_labels() if self.mode == 'all': self.train_data = train_data self.noise_label = noise_label elif self.mode == 'labeled': pred_idx = pred.nonzero()[0] self.probability = [probability[i] for i in pred_idx] clean = (np.array(noise_label) == np.array(train_label)) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability, clean) auc, _, _ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n' % (pred.sum(), auc)) log.flush() self.train_data = train_data[pred_idx] self.noise_label = noise_label[pred_idx] print("%s data has a size of %d" % (self.mode, len(self.noise_label))) elif self.mode == "unlabeled": pred_idx = (1 - pred).nonzero()[0] self.train_data = train_data[pred_idx] self.noise_label = noise_label[pred_idx] print("%s data has a size of %d" % (self.mode, len(self.noise_label)))
def __init__(self, dataset, r, noise_mode, root_dir, transform, mode, noise_file='', pred=[], probability=[], log='', teacher_idx=None, truncate_mode=None, refinement=None): self.r = r # noise ratio self.transform = transform self.mode = mode self.transition = {0:0,2:0,4:7,7:7,1:1,9:1,3:5,5:3,6:6,8:8} # class transition for asymmetric noise # For distill test self.teacher_idx = teacher_idx self.truncate_mode = truncate_mode self.train_label = None self.refinement = refinement if self.mode=='test': if dataset=='cifar10': test_dic = unpickle('%s/test_batch'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['labels'] elif dataset=='cifar100': test_dic = unpickle('%s/test'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['fine_labels'] else: train_data=[] train_label=[] if dataset=='cifar10': for n in range(1,6): dpath = '%s/data_batch_%d'%(root_dir,n) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label+data_dic['labels'] train_data = np.concatenate(train_data) elif dataset=='cifar100': train_dic = unpickle('%s/train'%root_dir) train_data = train_dic['data'] train_label = train_dic['fine_labels'] train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) self.train_label = train_label if os.path.exists(noise_file): noise_label = json.load(open(noise_file,"r")) else: #inject noise fix_seed() noise_label = [] idx = list(range(50000)) random.shuffle(idx) num_noise = int(self.r*50000) noise_idx = idx[:num_noise] for i in range(50000): if i in noise_idx: if noise_mode=='sym': if dataset=='cifar10': noiselabel = random.randint(0,9) elif dataset=='cifar100': noiselabel = random.randint(0,99) noise_label.append(noiselabel) elif noise_mode=='asym': noiselabel = self.transition[train_label[i]] noise_label.append(noiselabel) else: noise_label.append(train_label[i]) print("save noisy labels to %s ..."%noise_file) json.dump(noise_label,open(noise_file,"w")) if self.mode == 'all': self.train_data = train_data self.noise_label = noise_label if self.truncate_mode == 'initial': self.train_data = self.train_data[teacher_idx] self.noise_label = [noise_label[i] for i in teacher_idx] else: if self.mode == "labeled": pred_idx = pred.nonzero()[0] if self.truncate_mode == 'initial': pred_idx = pred_idx.tolist() teacher_idx = teacher_idx.tolist() pred_idx = list(set(pred_idx) & set(teacher_idx)) pred_idx = torch.tensor(pred_idx) self.probability = [probability[i] for i in pred_idx] clean = (np.array(noise_label)==np.array(train_label)) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability,clean) auc,_,_ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n'%(pred.sum(),auc)) log.flush() elif self.mode == "unlabeled": pred_idx = (1-pred).nonzero()[0] if self.truncate_mode == 'initial': whole_idx = list(range(50000)) pred_idx = pred_idx.tolist() teacher_idx = teacher_idx.tolist() tmp_set = set(whole_idx) - set(teacher_idx) tmp_set = tmp_set | set(pred_idx) pred_idx = torch.tensor(list(tmp_set)) elif self.mode == "labeled_svd": if self.refinement: pred_idx = pred.nonzero()[0] pred_idx_set = set(pred_idx.tolist()) teacher_idx_set = set(teacher_idx.tolist()) pred_idx = torch.tensor(list(pred_idx_set & teacher_idx_set)) self.probability = [probability[i] for i in pred_idx] clean = (np.array(noise_label)==np.array(train_label)) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability,clean) auc,_,_ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n'%(pred.sum(),auc)) log.flush() else: pred_idx = teacher_idx probability = torch.ones(50000,) self.probability = [probability[i] for i in pred_idx] log.write('Number of labeled samples (by svd) : %d' % teacher_idx.shape[0]) elif self.mode == "unlabeled_svd": if self.refinement: clean_pred_idx = pred.nonzero()[0] clean_pred_idx_set = set(clean_pred_idx.tolist()) teacher_idx_set = set(teacher_idx.tolist()) all_idx_set = set(range(50000)) pred_idx = torch.tensor(list(all_idx_set - (clean_pred_idx_set & teacher_idx_set))) else: pred_idx = torch.arange(0, 50000) pred_idx_set = set(pred_idx.tolist()) - set(teacher_idx.tolist()) pred_idx = torch.tensor(list(pred_idx_set)) self.train_data = train_data[pred_idx] self.noise_label = [noise_label[i] for i in pred_idx] print("%s data has a size of %d"%(self.mode,len(self.noise_label)))
def __init__(self, dataset, r, noise_mode, root_dir, transform, mode, noise_file='', pred=[], probability=[], log=''): self.r = r # noise ratio self.transform = transform self.mode = mode self.transition = {0:0,2:0,4:7,7:7,1:1,9:1,3:5,5:3,6:6,8:8} # class transition for asymmetric noise if self.mode=='test' or self.mode=='test_average': if dataset=='cifar10': test_dic = unpickle('%s/test_batch'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['labels'] elif dataset=='cifar100': test_dic = unpickle('%s/test'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['fine_labels'] else: train_data = [] train_label = [] if dataset=='cifar10': for n in range(1,6): dpath = '%s/data_batch_%d'%(root_dir,n) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label+data_dic['labels'] train_data = np.concatenate(train_data) elif dataset=='cifar100': train_dic = unpickle('%s/train'%root_dir) train_data = train_dic['data'] train_label = train_dic['fine_labels'] train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) if self.mode == 'eval' or self.mode == 'eval_average': self.eval_data = train_data[45000:] self.eval_label = train_label[45000:] else: if os.path.exists(noise_file): noise_label = json.load(open(noise_file,"r")) else: #inject noise noise_label = [] if self.mode in ['all', 'benchmark_all', 'benchmark_all_average']: size = 50000 elif self.mode in ['train', 'benchmark', 'benchmark_average']: size = 45000 idx = list(range(size)) random.shuffle(idx) num_noise = int(self.r*size) noise_idx = idx[:num_noise] for i in range(size): if i in noise_idx: if noise_mode=='sym': if dataset=='cifar10': noiselabel = random.randint(0,9) elif dataset=='cifar100': noiselabel = random.randint(0,99) noise_label.append(noiselabel) elif noise_mode=='asym': noiselabel = self.transition[train_label[i]] noise_label.append(noiselabel) else: noise_label.append(train_label[i]) print("save noisy labels to %s ..."%noise_file) json.dump(noise_label,open(noise_file,"w")) if self.mode in ['all', 'benchmark_all', 'benchmark_all_average']: self.train_data = train_data self.noise_label = noise_label self.clean_label = train_label elif self.mode in ['train', 'benchmark', 'benchmark_average']: self.train_data = train_data[:45000] self.noise_label = noise_label[:45000] self.clean_label = train_label[:45000] else: if self.mode == "labeled": pred_idx = pred.nonzero()[0] self.probability = [probability[i] for i in pred_idx] clean = (np.array(noise_label)==np.array(train_label)) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability,clean) auc,_,_ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n'%(pred.sum(),auc)) log.flush() elif self.mode == "unlabeled": pred_idx = (1-pred).nonzero()[0] self.train_data = train_data[pred_idx] self.noise_label = [noise_label[i] for i in pred_idx] print("%s data has a size of %d"%(self.mode,len(self.noise_label)))
def __init__(self, dataset, noisy_dataset, r, on, noise_mode, root_dir, noise_data_dir, transform, mode, noise_file='', pred=[], probability=[], log='', targets=None): self.r = r # total noise ratio self.on = on # proportion of open noise self.transform = transform self.mode = mode self.transition = { 0: 0, 2: 0, 4: 7, 7: 7, 1: 1, 9: 1, 3: 5, 5: 3, 6: 6, 8: 8 } # class transition for asymmetric noise self.open_noise = None self.closed_noise = None if self.mode == 'test': if dataset == 'cifar10': test_dic = unpickle('%s/test_batch' % root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['labels'] elif dataset == 'cifar100': test_dic = unpickle('%s/test' % root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['fine_labels'] elif self.mode == 'clean': if not os.path.exists(noise_file): print('Noise not defined') return if self.open_noise is None or self.closed_noise is not None: noise = json.load(open(noise_file, "r")) noise_labels = noise['noise_labels'] self.open_noise = noise['open_noise'] self.closed_noise = noise['closed_noise'] train_data = [] train_label = [] noise_data = [] if dataset == 'cifar10': for n in range(1, 6): dpath = '%s/data_batch_%d' % (root_dir, n) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label + data_dic['labels'] train_data = np.concatenate(train_data) train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) open_noise = [item[0] for item in self.open_noise] clean_indices = list( set(range(50000)) - set(open_noise) - set(self.closed_noise)) self.clean_data = train_data[clean_indices] self.clean_label = np.asarray(train_label)[clean_indices] else: train_data = [] train_label = [] noise_data = [] if dataset == 'cifar10': for n in range(1, 6): dpath = '%s/data_batch_%d' % (root_dir, n) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label + data_dic['labels'] train_data = np.concatenate(train_data) elif dataset == 'cifar100': train_dic = unpickle('%s/train' % root_dir) train_data = train_dic['data'] train_label = train_dic['fine_labels'] train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) if noisy_dataset == 'imagenet32': noise_data = None else: noise_data = unpickle( '%s/train' % noise_data_dir)['data'].reshape( (50000, 3, 32, 32)).transpose((0, 2, 3, 1)) if os.path.exists(noise_file): noise = json.load(open(noise_file, "r")) noise_labels = noise['noise_labels'] self.open_noise = noise['open_noise'] self.closed_noise = noise['closed_noise'] for cleanIdx, noisyIdx in noise['open_noise']: if noisy_dataset == 'imagenet32': train_data[cleanIdx] = np.asarray( Image.open('{}/{}.png'.format( noise_data_dir, str(noisyIdx + 1).zfill(7)))).reshape( (32, 32, 3)) else: train_data[cleanIdx] = noise_data[noisyIdx] else: #inject noise noise_labels = [] # all labels (some noisy, some clean) idx = list(range(50000)) # indices of cifar dataset random.shuffle(idx) num_total_noise = int(self.r * 50000) # total amount of noise num_open_noise = int( self.on * num_total_noise) # total amount of noisy/openset images if noisy_dataset == 'imagenet32': # indices of openset source images target_noise_idx = list(range(1281149)) else: target_noise_idx = list(range(50000)) random.shuffle(target_noise_idx) self.open_noise = list( zip(idx[:num_open_noise], target_noise_idx[:num_open_noise] )) # clean sample -> openset sample mapping self.closed_noise = idx[ num_open_noise:num_total_noise] # closed set noise indices # populate noise_labels for i in range(50000): if i in self.closed_noise: if noise_mode == 'sym': if dataset == 'cifar10': noiselabel = random.randint(0, 9) elif dataset == 'cifar100': noiselabel = random.randint(0, 99) noise_labels.append(noiselabel) elif noise_mode == 'asym': noiselabel = self.transition[train_label[i]] noise_labels.append(noiselabel) else: noise_labels.append(train_label[i]) # populate openset noise images for cleanIdx, noisyIdx in self.open_noise: if noisy_dataset == 'imagenet32': train_data[cleanIdx] = np.asarray( Image.open('{}/{}.png'.format( noise_data_dir, str(noisyIdx + 1).zfill(7)))).reshape( (32, 32, 3)) else: train_data[cleanIdx] = noise_data[noisyIdx] # write noise to a file, to re-use noise = { 'noise_labels': noise_labels, 'open_noise': self.open_noise, 'closed_noise': self.closed_noise } print("save noise to %s ..." % noise_file) json.dump(noise, open(noise_file, "w")) if self.mode == 'all': self.train_data = train_data if targets is None: self.noise_labels = noise_labels else: self.noise_labels = targets else: if self.mode == "labeled": pred_idx = pred.nonzero()[0] self.probability = [probability[i] for i in pred_idx] clean = (np.array(noise_labels) == np.array(train_label)) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability, clean) # note: If all the labels are clean, the following will return NaN auc, _, _ = auc_meter.value() elif self.mode == "unlabeled": pred_idx = pred.nonzero()[0] self.train_data = train_data[pred_idx] self.noise_labels = [noise_labels[i] for i in pred_idx] print("%s data has a size of %d" % (self.mode, len(self.noise_labels)))
def __init__(self, dataset, r, noise_mode, root_dir, transform, mode, noise_file='', pred=[], probability=[], log=''): # mode # Test : Test # All : All # Labeled : Labeled # UnLabeled : UnLabeled self.r = r # noise ratio self.transform = transform self.mode = mode self.transition = {0:0,2:0,4:4,7:7,1:1,9:1,3:5,5:3,6:6,8:8} # class transition for asymmetric noise if self.mode=='test': if dataset=='cifar10': test_dic = unpickle('%s/test_batch'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['labels'] elif dataset=='cifar100': test_dic = unpickle('%s/test'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['fine_labels'] else: train_data=[] train_label=[] if dataset=='cifar10': for n in range(1,6): dpath = '%s/data_batch_%d'%(root_dir,n) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label+data_dic['labels'] train_data = np.concatenate(train_data) elif dataset=='cifar100': train_dic = unpickle('%s/train'%root_dir) train_data = train_dic['data'] train_label = train_dic['fine_labels'] train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) # Noise Label 생성 if os.path.exists(noise_file): noise_label = json.load(open(noise_file,"r")) else: #inject noise noise_label = [] idx = list(range(50000)) random.shuffle(idx) # num_noise = int(self.r*50000) -> 순 사기꾼이여ㅡㅡ # noise_idx = idx[:num_noise] noise_idx = idx[:] num_classes = 10 if dataset == 'cifar10' else 100 if noise_mode == 'sym': C = uniform_mix_C(self.r, num_classes) # if dataset=='cifar10': # noiselabel = random.randint(0,9) # elif dataset=='cifar100': # noiselabel = random.randint(0,99) # noise_label.append(noiselabel) elif noise_mode == 'asym': C = flip_labels_C(self.r, num_classes) for i in range(50000): if i in noise_idx: noiselabel = np.random.choice(num_classes, p=C[train_label[i]]) noise_label.append(noiselabel) else: noise_label.append(train_label[i]) print("save noisy labels to %s ..."%noise_file) json.dump(noise_label,open(noise_file,"w")) # 전체 부분 if self.mode == 'all': self.train_data = train_data self.noise_label = noise_label else: if self.mode == "labeled": pred_idx = pred.nonzero()[0] # 4770 self.probability = [probability[i] for i in pred_idx] # 4770 clean = (np.array(noise_label)==np.array(train_label)) # 39981 auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability,clean) auc,_,_ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n'%(pred.sum(),auc)) log.flush() elif self.mode == "unlabeled": pred_idx = (1-pred).nonzero()[0] # 45230 self.train_data = train_data[pred_idx] self.noise_label = [noise_label[i] for i in pred_idx] print("%s data has a size of %d"%(self.mode,len(self.noise_label)))
def __init__(self, dataset, r, noise_mode, root_dir, transform, mode, noise_file='', pred=[], probability=[], log=''): self.r = r # noise ratio self.transform = transform self.mode = mode #self.transition = {0:0,2:0,4:7,7:7,1:1,9:1,3:5,5:3,6:6,8:8} # class transition for asymmetric noise #self.transition = {0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 9: 9, 9: 0} # 十类 self.transition = {0: 1, 1: 0} # 两类 if self.mode=='test': if dataset=='cifar10': test_dic = unpickle('%s/test_batch'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['labels'] if noise_mode == 'asym_two_unbalanced_classes': for i in range(len(self.test_label)): if self.test_label[i] != 1: self.test_label[i] = 0 #print("self.test_label=",self.test_label) elif dataset=='cifar100': test_dic = unpickle('%s/test'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['fine_labels'] else: train_data=[] train_label=[] if dataset=='cifar10': for n in range(1,6): dpath = '%s/data_batch_%d'%(root_dir,n) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label+data_dic['labels'] train_data = np.concatenate(train_data) if noise_mode == 'asym_two_unbalanced_classes': for i in range(len(train_label)): if train_label[i] != 1: train_label[i] = 0 #print("train_label=",train_label) elif dataset=='cifar100': train_dic = unpickle('%s/train'%root_dir) train_data = train_dic['data'] train_label = train_dic['fine_labels'] train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) #每次重新获得第一次初始化的数据 if os.path.exists(noise_file): noise_label = json.load(open(noise_file,"r")) else: #inject noise noise_label = [] idx = list(range(50000)) random.shuffle(idx) #num_noise = int(self.r*50000) if noise_mode == 'sym': num_noise = int((self.r / 9) / ( 1-self.r + self.r / 9 ) * 50000) else: num_noise = int(self.r * 50000) noise_idx = idx[:num_noise] for i in range(50000): if i in noise_idx: if noise_mode=='sym': if dataset=='cifar10': noiselabel = random.randint(0, 9) #print("noiselabel=",noiselabel) #print("train_label[i]=",train_label[i]) while noiselabel == train_label[i]: noiselabel = random.randint(0,9) elif dataset=='cifar100': noiselabel = random.randint(0,99) noise_label.append(noiselabel) elif noise_mode=='asym_two_unbalanced_classes': noiselabel = self.transition[train_label[i]] noise_label.append(noiselabel) else: noise_label.append(train_label[i]) print("save noisy labels to %s ..."%noise_file) json.dump(noise_label,open(noise_file,"w")) if self.mode == 'all': self.train_data = train_data self.noise_label = noise_label else: if self.mode == "labeled": pred_idx = pred.nonzero()[0]#二维数组,返回第几个samples,[0]表示第几行 self.probability = [probability[i] for i in pred_idx] clean = (np.array(noise_label)==np.array(train_label)) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability,clean) auc,_,_ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n'%(pred.sum(),auc)) log.flush() elif self.mode == "unlabeled": pred_idx = (1-pred).nonzero()[0] self.train_data = train_data[pred_idx]# 每次初始化的时候 这里面是有label的数据 或者 无label的数据 self.noise_label = [noise_label[i] for i in pred_idx] #每次初始化的时候 这里面是有label的数据 或者 无label的数据 print("%s data has a size of %d"%(self.mode,len(self.noise_label)))
class AllInOneMeter(object): """ All in one meter: AUC """ def __init__(self, device): self.device = device # super(AllInOneMeter, self).__init__() self.out1auc1 = AUCMeter() self.out1auc2 = AUCMeter() self.out1auc3 = AUCMeter() self.out1auc4 = AUCMeter() self.out1auc5 = AUCMeter() self.out2auc1 = AUCMeter() self.out2auc2 = AUCMeter() self.out2auc3 = AUCMeter() self.out2auc4 = AUCMeter() self.out2auc5 = AUCMeter() self.loss1 = [] self.loss2 = [] self.loss3 = [] self.loss = [] self.jaccard = [] # self.nbatch = 0 self.intersection = torch.zeros([5], dtype=torch.float, device=self.device) self.union = torch.zeros([5], dtype=torch.float, device=self.device) self.reset() def reset(self): # self.scores = torch.DoubleTensor(torch.DoubleStorage()).numpy() # self.targets = torch.LongTensor(torch.LongStorage()).numpy() self.out1auc1.reset() self.out1auc2.reset() self.out1auc3.reset() self.out1auc4.reset() self.out1auc5.reset() self.out2auc1.reset() self.out2auc2.reset() self.out2auc3.reset() self.out2auc4.reset() self.out2auc5.reset() self.loss1 = [] self.loss2 = [] self.loss3 = [] self.loss = [] self.jaccard = [] self.intersection = torch.zeros([5], dtype=torch.float, device=self.device) self.union = torch.zeros([5], dtype=torch.float, device=self.device) # self.nbatch = 0 def add(self, mask_prob, true_mask, mask_ind_prob1, mask_ind_prob2, true_mask_ind, loss1, loss2, loss3, loss): self.out1auc1.add(mask_ind_prob1[:, 0].data, true_mask_ind[:, 0].data) self.out1auc2.add(mask_ind_prob1[:, 1].data, true_mask_ind[:, 1].data) self.out1auc3.add(mask_ind_prob1[:, 2].data, true_mask_ind[:, 2].data) self.out1auc4.add(mask_ind_prob1[:, 3].data, true_mask_ind[:, 3].data) self.out1auc5.add(mask_ind_prob1[:, 4].data, true_mask_ind[:, 4].data) self.out2auc1.add(mask_ind_prob2[:, 0].data, true_mask_ind[:, 0].data) self.out2auc2.add(mask_ind_prob2[:, 1].data, true_mask_ind[:, 1].data) self.out2auc3.add(mask_ind_prob2[:, 2].data, true_mask_ind[:, 2].data) self.out2auc4.add(mask_ind_prob2[:, 3].data, true_mask_ind[:, 3].data) self.out2auc5.add(mask_ind_prob2[:, 4].data, true_mask_ind[:, 4].data) self.loss1.append(loss1) self.loss2.append(loss2) self.loss3.append(loss3) self.loss.append(loss) # self.nbatch += true_mask.shape[0] y_pred = (mask_prob > 0.3).type(true_mask.dtype) y_true = true_mask self.intersection += (y_pred * y_true).sum(dim=-2).sum(dim=-1).sum(dim=0) self.union += y_true.sum(dim=-2).sum(dim=-1).sum(dim=0) + y_pred.sum( dim=-2).sum(dim=-1).sum(dim=0) def value(self): jaccard_array = (self.intersection / (self.union - self.intersection)) # jaccard_array = jaccard_array.data.cpu().numpy() jaccard = jaccard_array.mean() metrics = { 'out1auc1': self.out1auc1.value()[0], 'out1auc2': self.out1auc2.value()[0], 'out1auc3': self.out1auc3.value()[0], 'out1auc4': self.out1auc4.value()[0], 'out1auc5': self.out1auc5.value()[0], 'out2auc1': self.out2auc1.value()[0], 'out2auc2': self.out2auc2.value()[0], 'out2auc3': self.out2auc3.value()[0], 'out2auc4': self.out2auc4.value()[0], 'out2auc5': self.out2auc5.value()[0], 'loss1': np.mean(self.loss1), 'loss2': np.mean(self.loss2), 'loss3': np.mean(self.loss3), 'loss': np.mean(self.loss), 'jaccard': jaccard.item(), 'jaccard1': jaccard_array[0].item(), 'jaccard2': jaccard_array[1].item(), 'jaccard3': jaccard_array[2].item(), 'jaccard4': jaccard_array[3].item(), 'jaccard5': jaccard_array[4].item(), } return metrics
def __init__(self, dataset, r, noise_mode, root_dir, transform, mode, noise_file='', clean_file='', pred=[], probability=[], log=''): self.r = r # noise ratio self.transform = transform self.noise_mode = noise_mode self.mode = mode self.transition = { 0: 0, 2: 0, 4: 7, 7: 7, 1: 1, 9: 1, 3: 5, 5: 3, 6: 6, 8: 8 } # class transition for asymmetric noise if self.mode == 'test': if dataset == 'cifar10': test_dic = unpickle('%s/data/cifar-10-batches-py/test_batch' % root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['labels'] elif dataset == 'cifar100': test_dic = unpickle('%s/data/cifar-100-python/test' % root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['fine_labels'] else: train_data = [] train_label = [] if dataset == 'cifar10': #print("current path is {}".format(sys.path[0])) for n in range(1, 6): dpath = '%s/data/cifar-10-batches-py/data_batch_%d' % ( root_dir, n) #print("path is {}".format(dpath)) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label + data_dic['labels'] train_data = np.concatenate(train_data) elif dataset == 'cifar100': train_dic = unpickle('%s/data/cifar-100-python/train' % root_dir) train_data = train_dic['data'] train_label = train_dic['fine_labels'] train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) train_label = np.array(train_label) noise_label = train_label.copy() if dataset == 'cifar10': nb_classes = 10 elif dataset == 'cifar100': nb_classes = 100 clean_per_class = int(5000 / nb_classes) # cifar10: 100 else: 10 noise_per_class = int(50000 / nb_classes * r) #select clean_per_class numbers of data in each class as clean data #leave the other data to add noise #the 0th data processing is at the outer loop #0th add noise (for index) all_index = np.arange(50000).reshape(-1) clean_indices = all_index[np.where( train_label == 0)[0]][-clean_per_class:] noise_idx = [ all_index[np.where(train_label == 0)[0]][:-clean_per_class] ] #from 1th to 9th to add noise (for index) for i in range(nb_classes - 1): indices1 = all_index[np.where(train_label == i + 1)[0]][-clean_per_class:] noisy_indices1 = all_index[np.where(train_label == i + 1)[0]][:-clean_per_class] clean_indices = np.concatenate((clean_indices, indices1)) noise_idx.append(noisy_indices1) #add noise for t, i in enumerate(noise_idx): # randomly selected one image as the center image_center = train_data[i[10]] norm_loss = np.zeros(len(i)) for j, k in enumerate(i): images = train_data[k] norm_loss[j] = np.linalg.norm(image_center - images) noisy_indices = i[norm_loss.argsort()[:noise_per_class]] noise_label[noisy_indices] = (t + 1) % nb_classes if self.mode == 'all': self.train_data = train_data self.noise_label = noise_label elif self.mode == 'small': self.train_data = train_data[::100] self.noise_label = noise_label[::100] else: if self.mode == "labeled": pred_idx = pred.nonzero()[0] self.probability = [probability[i] for i in pred_idx] #clean = (np.array(noise_label)==np.array(train_label)) clean = (noise_label == train_label) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability, clean) auc, _, _ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n' % (pred.sum(), auc)) log.flush() elif self.mode == "unlabeled": pred_idx = (1 - pred).nonzero()[0] self.train_data = train_data[pred_idx] self.noise_label = noise_label[pred_idx] print("%s data has a size of %d" % (self.mode, len(self.noise_label)))