def confusion_matrix(output, labels, num_labels): conf_meter = ConfusionMeter(num_labels) auc_meter = AUCMeter() preds = output.max(1)[1].type_as(labels) conf_meter.add(preds.data.squeeze(), labels.type(torch.LongTensor).data) auc_meter.add(preds.data.squeeze(), labels.data.squeeze()) return conf_meter, auc_meter
def __init__(self, labels, logger=None): # self._labels = labels self._logger = logger self._loss = [] self._acc = [] self._labels = labels self._conf = ConfusionMeter(len(self._labels)) # self._auc = {label: AUCMeter() for label in range(len(labels))} self._auc = AUCMeter() self._c_inp, self._c_tar = Counter(), Counter()
class ModelMeter: def __init__(self, labels, logger=None): # self._labels = labels self._logger = logger self._loss = [] self._acc = [] self._labels = labels self._conf = ConfusionMeter(len(self._labels)) # self._auc = {label: AUCMeter() for label in range(len(labels))} self._auc = AUCMeter() self._c_inp, self._c_tar = Counter(), Counter() def update(self, loss, output, targets): self._loss.append(loss) self._acc.append(accuracy(output, targets)) preds = output.max(1)[1].type_as(targets) p = preds.cpu().data.numpy() t = targets.cpu().data.numpy() cur_indexes = np.logical_and(p != 2, t != 2) self._auc.add(p[cur_indexes], t[cur_indexes]) self._conf.add(preds.data.squeeze(), targets.data.squeeze()) self._c_inp.update(p) self._c_tar.update(t) # preds = preds[preds != 2 and (targets != 2)] # for label, meter in self._auc.items(): # cur_pred = preds.eq(label) # cur_label = targets.eq(label) # meter.add(cur_pred.data.squeeze(), cur_label.data.squeeze()) def log(self, msg, level=logging.DEBUG, func=None): if func is None: loss, acc = self._loss[-1].data[0], self._acc[-1].data[0] else: loss, acc = func(self._loss).data[0], func(self._acc).data[0] self._logger.log(level, "%s: loss: %3.4f, acc: %3.4f", msg, loss, acc) @property def last_acc(self): return self._acc[-1].data[0] @property def last_loss(self): return self._loss[-1].data[0]
def test(segment_size): test_list_pub = pickle.load( open(os.path.join(MTAT_SPLIT_FOLDER, 'test_list_pub.cP'), 'rb')) total_test_size = len(test_list_pub) model = local_model(segment_size).cuda() model.load_state_dict( torch.load( os.path.join(ENCODER_FOLDER, 'local_model_' + str(segment_size) + '.pt'))) model.eval() auc = AUCMeter() for start in range(0, total_test_size, n_songs): print("Loading dataset...", start) test_features = np.concatenate([ np.load( os.path.join(MTAT_NPY_FOLDER, 'testing/' + test_list_pub[i])) for i in range(start, min(start + n_songs, total_test_size)) ]) test_labels = np.load(os.path.join( MTAT_SPLIT_FOLDER, 'y_test_pub.npy'))[start:min(start + n_songs, total_test_size)] if normalization: mean = np.mean(test_features, axis=0) var = np.var(test_features, axis=0) test_features = (test_features - mean) / np.sqrt(var) test_data = CustomDataset(test_features, test_labels) test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size) print("Dataset loaded") for data, labels in test_loader: X = Variable(data).cuda() out, _ = model(X) auc_out = np.reshape(out.data.cpu().numpy(), -1) auc_target = np.reshape(labels, -1) auc.add(auc_out, auc_target) auc_tuple = auc.value() print("AUC = ", auc_tuple[0]) plt.plot(auc_tuple[2], auc_tuple[1]) plt.plot([0, 1]) plt.show()
def evaluate(model, eval_iter, opt): model.eval() accuracy = [] threshold = 0.5 AUC_list = [AUCMeter() for _ in range(opt.label_size)] for index, batch in enumerate(eval_iter): text = batch.comment_text.data label = torch.stack([ batch.toxic, batch.severe_toxic, batch.obscene, batch.threat, batch.insult, batch.identity_hate ], dim=1) label = label.float() # for label (batch_size, classes_size) # for text (batch_size, max_seq_len) pred = model(text) is_class = pred > threshold # is it is greater than the threshold is_class = is_class.float() # (batch_size, classes_size) # for AUC_meter pred = torch.nn.functional.sigmoid(pred) print(pred) print(label) for i in range(opt.label_size): if opt.use_cuda: AUC_list[i].add( output=pred.data.cpu().numpy()[:, i], target=label.data.cpu().numpy()[:, i] ) else: AUC_list[i].add( output=pred.data.numpy()[:, i], target=label.data.numpy()[:, i] ) percision = is_class == label # (batch_size, classes_size) percision = percision.float() percision = percision.mean(dim=0) # (classes_size) if opt.use_cuda: accuracy.append(percision.data.cpu().numpy()) else: accuracy.append(percision.data.numpy()) # accuracy () model.train() # return (classes_size) # AUC AUC_scores = [AUC_list[i].value()[0] for i in range(opt.label_size)] return np.mean(accuracy, axis=0), AUC_scores # return the mean data, the accuracy for all six classes
def __init__(self, input_key: str = "targets", output_key: str = "logits", prefix: str = "auc", class_names: List[str] = None, num_classes: int = 1): self.prefix = prefix self.input_key = input_key self.output_key = output_key self.class_names = class_names self.num_classes = num_classes \ if class_names is None \ else len(class_names) assert self.num_classes is not None self.auc_meters = [AUCMeter() for _ in range(self.num_classes)]
def __init__(self, r, transform, mode, pred=[], probability=[], log=''): self.r = r # noise ratio self.transform = transform self.mode = mode train_loader, val_loader = get_chexpert_loaders(r, batch_size=32) if self.mode == 'test': self.test_data = val_loader.get_all_samples() self.test_label = val_loader.get_all_real_ground_truth() else: train_label = train_loader.get_all_real_ground_truth() train_data = train_loader.get_all_samples() noise_label = train_loader.get_all_labels() if self.mode == 'all': self.train_data = train_data self.noise_label = noise_label elif self.mode == 'labeled': pred_idx = pred.nonzero()[0] self.probability = [probability[i] for i in pred_idx] clean = (np.array(noise_label) == np.array(train_label)) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability, clean) auc, _, _ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n' % (pred.sum(), auc)) log.flush() self.train_data = train_data[pred_idx] self.noise_label = noise_label[pred_idx] print("%s data has a size of %d" % (self.mode, len(self.noise_label))) elif self.mode == "unlabeled": pred_idx = (1 - pred).nonzero()[0] self.train_data = train_data[pred_idx] self.noise_label = noise_label[pred_idx] print("%s data has a size of %d" % (self.mode, len(self.noise_label)))
def __init__(self, dataset, r, noise_mode, root_dir, transform, mode, noise_file='', pred=[], probability=[], log=''): self.r = r # noise ratio self.transform = transform self.mode = mode self.transition = {0:0,2:0,4:7,7:7,1:1,9:1,3:5,5:3,6:6,8:8} # class transition for asymmetric noise if self.mode=='test' or self.mode=='test_average': if dataset=='cifar10': test_dic = unpickle('%s/test_batch'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['labels'] elif dataset=='cifar100': test_dic = unpickle('%s/test'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['fine_labels'] else: train_data = [] train_label = [] if dataset=='cifar10': for n in range(1,6): dpath = '%s/data_batch_%d'%(root_dir,n) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label+data_dic['labels'] train_data = np.concatenate(train_data) elif dataset=='cifar100': train_dic = unpickle('%s/train'%root_dir) train_data = train_dic['data'] train_label = train_dic['fine_labels'] train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) if self.mode == 'eval' or self.mode == 'eval_average': self.eval_data = train_data[45000:] self.eval_label = train_label[45000:] else: if os.path.exists(noise_file): noise_label = json.load(open(noise_file,"r")) else: #inject noise noise_label = [] if self.mode in ['all', 'benchmark_all', 'benchmark_all_average']: size = 50000 elif self.mode in ['train', 'benchmark', 'benchmark_average']: size = 45000 idx = list(range(size)) random.shuffle(idx) num_noise = int(self.r*size) noise_idx = idx[:num_noise] for i in range(size): if i in noise_idx: if noise_mode=='sym': if dataset=='cifar10': noiselabel = random.randint(0,9) elif dataset=='cifar100': noiselabel = random.randint(0,99) noise_label.append(noiselabel) elif noise_mode=='asym': noiselabel = self.transition[train_label[i]] noise_label.append(noiselabel) else: noise_label.append(train_label[i]) print("save noisy labels to %s ..."%noise_file) json.dump(noise_label,open(noise_file,"w")) if self.mode in ['all', 'benchmark_all', 'benchmark_all_average']: self.train_data = train_data self.noise_label = noise_label self.clean_label = train_label elif self.mode in ['train', 'benchmark', 'benchmark_average']: self.train_data = train_data[:45000] self.noise_label = noise_label[:45000] self.clean_label = train_label[:45000] else: if self.mode == "labeled": pred_idx = pred.nonzero()[0] self.probability = [probability[i] for i in pred_idx] clean = (np.array(noise_label)==np.array(train_label)) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability,clean) auc,_,_ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n'%(pred.sum(),auc)) log.flush() elif self.mode == "unlabeled": pred_idx = (1-pred).nonzero()[0] self.train_data = train_data[pred_idx] self.noise_label = [noise_label[i] for i in pred_idx] print("%s data has a size of %d"%(self.mode,len(self.noise_label)))
background_image = Image.fromarray(background) foreground = np.uint8(cm.jet(heatmap) * 255) heatmap_opacity = foreground[:, :, 3] heatmap_opacity[:] = 64 threshold_prob = min(0.3, heatmap.max() - 0.05) heatmap_opacity[heatmap < threshold_prob] = 0 foreground_image = Image.fromarray(foreground) image = Image.alpha_composite(background_image, foreground_image) image.load() # needed for split() background = Image.new('RGB', image.size, color) background.paste(image, mask=image.split()[3]) image_array = np.array(background, dtype=np.uint8) return image_array auc_meter = AUCMeter() conf_meter = ConfusionMeter(options.n_classes) iterator = get_train_val_iterators(options) bar = progressbar.ProgressBar() for batch_idx, data in bar(enumerate(iterator['val']())): output = model(Variable(data['input'].cuda(), volatile=True)) target = data['target'].cpu().numpy() prob_tensor = F.softmax(output['classification']).data prob = prob_tensor.cpu().numpy() heatmap = F.softmax(output['segmentation']).data.cpu().numpy() auc_meter.add(prob[:, 1], target) conf_meter.add(prob_tensor, data['target']) input_images = data['input'].cpu().numpy() for i in range(input_images.shape[0]): image = np.repeat(input_images[i], 3, axis=0)
def __init__(self, dataset, r, noise_mode, root_dir, transform, mode, noise_file='', pred=[], probability=[], log='', teacher_idx=None, truncate_mode=None, refinement=None): self.r = r # noise ratio self.transform = transform self.mode = mode self.transition = {0:0,2:0,4:7,7:7,1:1,9:1,3:5,5:3,6:6,8:8} # class transition for asymmetric noise # For distill test self.teacher_idx = teacher_idx self.truncate_mode = truncate_mode self.train_label = None self.refinement = refinement if self.mode=='test': if dataset=='cifar10': test_dic = unpickle('%s/test_batch'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['labels'] elif dataset=='cifar100': test_dic = unpickle('%s/test'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['fine_labels'] else: train_data=[] train_label=[] if dataset=='cifar10': for n in range(1,6): dpath = '%s/data_batch_%d'%(root_dir,n) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label+data_dic['labels'] train_data = np.concatenate(train_data) elif dataset=='cifar100': train_dic = unpickle('%s/train'%root_dir) train_data = train_dic['data'] train_label = train_dic['fine_labels'] train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) self.train_label = train_label if os.path.exists(noise_file): noise_label = json.load(open(noise_file,"r")) else: #inject noise fix_seed() noise_label = [] idx = list(range(50000)) random.shuffle(idx) num_noise = int(self.r*50000) noise_idx = idx[:num_noise] for i in range(50000): if i in noise_idx: if noise_mode=='sym': if dataset=='cifar10': noiselabel = random.randint(0,9) elif dataset=='cifar100': noiselabel = random.randint(0,99) noise_label.append(noiselabel) elif noise_mode=='asym': noiselabel = self.transition[train_label[i]] noise_label.append(noiselabel) else: noise_label.append(train_label[i]) print("save noisy labels to %s ..."%noise_file) json.dump(noise_label,open(noise_file,"w")) if self.mode == 'all': self.train_data = train_data self.noise_label = noise_label if self.truncate_mode == 'initial': self.train_data = self.train_data[teacher_idx] self.noise_label = [noise_label[i] for i in teacher_idx] else: if self.mode == "labeled": pred_idx = pred.nonzero()[0] if self.truncate_mode == 'initial': pred_idx = pred_idx.tolist() teacher_idx = teacher_idx.tolist() pred_idx = list(set(pred_idx) & set(teacher_idx)) pred_idx = torch.tensor(pred_idx) self.probability = [probability[i] for i in pred_idx] clean = (np.array(noise_label)==np.array(train_label)) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability,clean) auc,_,_ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n'%(pred.sum(),auc)) log.flush() elif self.mode == "unlabeled": pred_idx = (1-pred).nonzero()[0] if self.truncate_mode == 'initial': whole_idx = list(range(50000)) pred_idx = pred_idx.tolist() teacher_idx = teacher_idx.tolist() tmp_set = set(whole_idx) - set(teacher_idx) tmp_set = tmp_set | set(pred_idx) pred_idx = torch.tensor(list(tmp_set)) elif self.mode == "labeled_svd": if self.refinement: pred_idx = pred.nonzero()[0] pred_idx_set = set(pred_idx.tolist()) teacher_idx_set = set(teacher_idx.tolist()) pred_idx = torch.tensor(list(pred_idx_set & teacher_idx_set)) self.probability = [probability[i] for i in pred_idx] clean = (np.array(noise_label)==np.array(train_label)) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability,clean) auc,_,_ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n'%(pred.sum(),auc)) log.flush() else: pred_idx = teacher_idx probability = torch.ones(50000,) self.probability = [probability[i] for i in pred_idx] log.write('Number of labeled samples (by svd) : %d' % teacher_idx.shape[0]) elif self.mode == "unlabeled_svd": if self.refinement: clean_pred_idx = pred.nonzero()[0] clean_pred_idx_set = set(clean_pred_idx.tolist()) teacher_idx_set = set(teacher_idx.tolist()) all_idx_set = set(range(50000)) pred_idx = torch.tensor(list(all_idx_set - (clean_pred_idx_set & teacher_idx_set))) else: pred_idx = torch.arange(0, 50000) pred_idx_set = set(pred_idx.tolist()) - set(teacher_idx.tolist()) pred_idx = torch.tensor(list(pred_idx_set)) self.train_data = train_data[pred_idx] self.noise_label = [noise_label[i] for i in pred_idx] print("%s data has a size of %d"%(self.mode,len(self.noise_label)))
model = model_data["model"] loss_fn = model_data["criterion"] optim = model_data["optim"] nb_data_test = 300 nb_canaux = 3 data_root = "./data/" data = load_preprocess_data(data_root, 0, nb_data_test) signals = data["signals"] target = data["targets"] res = np.zeros(target.shape) auc_c0 = AUCMeter() auc_c1 = AUCMeter() auc_c2 = AUCMeter() print("Testing model...") model.eval() for i in tqdm(range(nb_data_test)): out = model(th.Tensor(signals[None, i, :, :])).detach().numpy() auc_c0.add(out[None, 0, 0], target[None, i, 0]) auc_c1.add(out[None, 0, 1], target[None, i, 1]) auc_c2.add(out[None, 0, 2], target[None, i, 2]) res[i, 0] = 1 if out[0, 0] > 0.5 else -1 res[i, 1] = 1 if out[0, 1] > 0.5 else -1 res[i, 2] = 1 if out[0, 2] > 0.5 else -1
def __init__(self, dataset, noisy_dataset, r, on, noise_mode, root_dir, noise_data_dir, transform, mode, noise_file='', pred=[], probability=[], log='', targets=None): self.r = r # total noise ratio self.on = on # proportion of open noise self.transform = transform self.mode = mode self.transition = { 0: 0, 2: 0, 4: 7, 7: 7, 1: 1, 9: 1, 3: 5, 5: 3, 6: 6, 8: 8 } # class transition for asymmetric noise self.open_noise = None self.closed_noise = None if self.mode == 'test': if dataset == 'cifar10': test_dic = unpickle('%s/test_batch' % root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['labels'] elif dataset == 'cifar100': test_dic = unpickle('%s/test' % root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['fine_labels'] elif self.mode == 'clean': if not os.path.exists(noise_file): print('Noise not defined') return if self.open_noise is None or self.closed_noise is not None: noise = json.load(open(noise_file, "r")) noise_labels = noise['noise_labels'] self.open_noise = noise['open_noise'] self.closed_noise = noise['closed_noise'] train_data = [] train_label = [] noise_data = [] if dataset == 'cifar10': for n in range(1, 6): dpath = '%s/data_batch_%d' % (root_dir, n) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label + data_dic['labels'] train_data = np.concatenate(train_data) train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) open_noise = [item[0] for item in self.open_noise] clean_indices = list( set(range(50000)) - set(open_noise) - set(self.closed_noise)) self.clean_data = train_data[clean_indices] self.clean_label = np.asarray(train_label)[clean_indices] else: train_data = [] train_label = [] noise_data = [] if dataset == 'cifar10': for n in range(1, 6): dpath = '%s/data_batch_%d' % (root_dir, n) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label + data_dic['labels'] train_data = np.concatenate(train_data) elif dataset == 'cifar100': train_dic = unpickle('%s/train' % root_dir) train_data = train_dic['data'] train_label = train_dic['fine_labels'] train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) if noisy_dataset == 'imagenet32': noise_data = None else: noise_data = unpickle( '%s/train' % noise_data_dir)['data'].reshape( (50000, 3, 32, 32)).transpose((0, 2, 3, 1)) if os.path.exists(noise_file): noise = json.load(open(noise_file, "r")) noise_labels = noise['noise_labels'] self.open_noise = noise['open_noise'] self.closed_noise = noise['closed_noise'] for cleanIdx, noisyIdx in noise['open_noise']: if noisy_dataset == 'imagenet32': train_data[cleanIdx] = np.asarray( Image.open('{}/{}.png'.format( noise_data_dir, str(noisyIdx + 1).zfill(7)))).reshape( (32, 32, 3)) else: train_data[cleanIdx] = noise_data[noisyIdx] else: #inject noise noise_labels = [] # all labels (some noisy, some clean) idx = list(range(50000)) # indices of cifar dataset random.shuffle(idx) num_total_noise = int(self.r * 50000) # total amount of noise num_open_noise = int( self.on * num_total_noise) # total amount of noisy/openset images if noisy_dataset == 'imagenet32': # indices of openset source images target_noise_idx = list(range(1281149)) else: target_noise_idx = list(range(50000)) random.shuffle(target_noise_idx) self.open_noise = list( zip(idx[:num_open_noise], target_noise_idx[:num_open_noise] )) # clean sample -> openset sample mapping self.closed_noise = idx[ num_open_noise:num_total_noise] # closed set noise indices # populate noise_labels for i in range(50000): if i in self.closed_noise: if noise_mode == 'sym': if dataset == 'cifar10': noiselabel = random.randint(0, 9) elif dataset == 'cifar100': noiselabel = random.randint(0, 99) noise_labels.append(noiselabel) elif noise_mode == 'asym': noiselabel = self.transition[train_label[i]] noise_labels.append(noiselabel) else: noise_labels.append(train_label[i]) # populate openset noise images for cleanIdx, noisyIdx in self.open_noise: if noisy_dataset == 'imagenet32': train_data[cleanIdx] = np.asarray( Image.open('{}/{}.png'.format( noise_data_dir, str(noisyIdx + 1).zfill(7)))).reshape( (32, 32, 3)) else: train_data[cleanIdx] = noise_data[noisyIdx] # write noise to a file, to re-use noise = { 'noise_labels': noise_labels, 'open_noise': self.open_noise, 'closed_noise': self.closed_noise } print("save noise to %s ..." % noise_file) json.dump(noise, open(noise_file, "w")) if self.mode == 'all': self.train_data = train_data if targets is None: self.noise_labels = noise_labels else: self.noise_labels = targets else: if self.mode == "labeled": pred_idx = pred.nonzero()[0] self.probability = [probability[i] for i in pred_idx] clean = (np.array(noise_labels) == np.array(train_label)) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability, clean) # note: If all the labels are clean, the following will return NaN auc, _, _ = auc_meter.value() elif self.mode == "unlabeled": pred_idx = pred.nonzero()[0] self.train_data = train_data[pred_idx] self.noise_labels = [noise_labels[i] for i in pred_idx] print("%s data has a size of %d" % (self.mode, len(self.noise_labels)))
def __init__(self, num_class): super().__init__() self.num_class = num_class self.meters: List[AUCMeter] = [ AUCMeter() for k in range(self.num_class) ]
def __init__(self, dataset, r, noise_mode, root_dir, transform, mode, noise_file='', pred=[], probability=[], log=''): # mode # Test : Test # All : All # Labeled : Labeled # UnLabeled : UnLabeled self.r = r # noise ratio self.transform = transform self.mode = mode self.transition = {0:0,2:0,4:4,7:7,1:1,9:1,3:5,5:3,6:6,8:8} # class transition for asymmetric noise if self.mode=='test': if dataset=='cifar10': test_dic = unpickle('%s/test_batch'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['labels'] elif dataset=='cifar100': test_dic = unpickle('%s/test'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['fine_labels'] else: train_data=[] train_label=[] if dataset=='cifar10': for n in range(1,6): dpath = '%s/data_batch_%d'%(root_dir,n) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label+data_dic['labels'] train_data = np.concatenate(train_data) elif dataset=='cifar100': train_dic = unpickle('%s/train'%root_dir) train_data = train_dic['data'] train_label = train_dic['fine_labels'] train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) # Noise Label 생성 if os.path.exists(noise_file): noise_label = json.load(open(noise_file,"r")) else: #inject noise noise_label = [] idx = list(range(50000)) random.shuffle(idx) # num_noise = int(self.r*50000) -> 순 사기꾼이여ㅡㅡ # noise_idx = idx[:num_noise] noise_idx = idx[:] num_classes = 10 if dataset == 'cifar10' else 100 if noise_mode == 'sym': C = uniform_mix_C(self.r, num_classes) # if dataset=='cifar10': # noiselabel = random.randint(0,9) # elif dataset=='cifar100': # noiselabel = random.randint(0,99) # noise_label.append(noiselabel) elif noise_mode == 'asym': C = flip_labels_C(self.r, num_classes) for i in range(50000): if i in noise_idx: noiselabel = np.random.choice(num_classes, p=C[train_label[i]]) noise_label.append(noiselabel) else: noise_label.append(train_label[i]) print("save noisy labels to %s ..."%noise_file) json.dump(noise_label,open(noise_file,"w")) # 전체 부분 if self.mode == 'all': self.train_data = train_data self.noise_label = noise_label else: if self.mode == "labeled": pred_idx = pred.nonzero()[0] # 4770 self.probability = [probability[i] for i in pred_idx] # 4770 clean = (np.array(noise_label)==np.array(train_label)) # 39981 auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability,clean) auc,_,_ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n'%(pred.sum(),auc)) log.flush() elif self.mode == "unlabeled": pred_idx = (1-pred).nonzero()[0] # 45230 self.train_data = train_data[pred_idx] self.noise_label = [noise_label[i] for i in pred_idx] print("%s data has a size of %d"%(self.mode,len(self.noise_label)))
def test_vgg16(): parser = argparse.ArgumentParser("Test VGG16 Main") parser.add_argument("-d", "--data-path", type=str, required=True, dest="data_path") parser.add_argument("-l", "--label-path", type=str, required=True, dest="label_path") parser.add_argument("-m", "--model-path", type=str, required=True, dest="model_path") args = parser.parse_args() data_path = args.data_path label_path = args.label_path model_path = args.model_path # Test if numpy data and labels exist if not exists(data_path): raise FileNotFoundError( "Numpy data file doesn't exist ({}) !".format(data_path)) if not exists(label_path): raise FileNotFoundError( "Numpy label file doesn't exist ({}) !".format(label_path)) # Test if model save file exist if not exists(model_path): raise FileNotFoundError( "Model state dict file doesn't exist ({}) !".format(model_path)) print("Load model...") # Load model vgg16 = get_vgg16_modified() vgg16.load_state_dict(th.load(model_path)) vgg16.cuda() vgg16.eval() # Create AUC Meter auc_meter = AUCMeter() print("Load data...") # Load data data = np.load(data_path) labels = np.load(label_path) # Split eval nb_split = int(data.shape[0] * train_ratio) data = data[nb_split:] labels = labels[nb_split:] batch_size = 32 nb_batch = ceil(data.shape[0] / batch_size) # Loop on eval data for i_b in tqdm(range(nb_batch)): # Get batch indexes i_min = i_b * batch_size i_max = (i_b + 1) * batch_size i_max = i_max if i_max < data.shape[0] else data.shape[0] # Slice data to get batch batch = data[i_min:i_max, :, :, :] batch = batch.transpose(0, 3, 1, 2) batch = th.tensor(batch).cuda().float() / 255. # And labels batch_label = th.tensor(labels[i_min:i_max]).cuda().float() # Forward - Inférence out = vgg16(batch).squeeze(1) # Update metric auc_meter.add(out.cpu().detach(), batch_label.cpu().detach()) print("AUC value = {}".format(auc_meter.value()[0]))
simulator.train() for epoch in range(4): print("---epoch {}---".format(epoch)) for step, batch in enumerate(train_loader): feats, labels = batch logits = simulator(**feats) loss = criterion(logits, labels) opt.zero_grad() loss.backward() opt.step() if (step + 1) % 500 == 0: with torch.no_grad(): simulator.eval() auc = AUCMeter() for feats, labels in val_loader: outputs = torch.sigmoid(simulator(**feats)) auc.add(outputs, labels) print(step, auc.value()[0]) if auc.value()[0] > 0.735: break simulator.train() simulator.to("cpu") torch.save(simulator.state_dict(), simulator_path) # create a torch dataset class that adopt the simulator and generate the synthetic dataset synthetic_data_path = os.path.join(filepath, "full_impression_feats.pt") syn = SyntheticMovieLensDataset(filepath, simulator_path,
def __init__(self, dataset, r, noise_mode, root_dir, transform, mode, noise_file='', pred=[], probability=[], log=''): self.r = r # noise ratio self.transform = transform self.mode = mode #self.transition = {0:0,2:0,4:7,7:7,1:1,9:1,3:5,5:3,6:6,8:8} # class transition for asymmetric noise #self.transition = {0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 9: 9, 9: 0} # 十类 self.transition = {0: 1, 1: 0} # 两类 if self.mode=='test': if dataset=='cifar10': test_dic = unpickle('%s/test_batch'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['labels'] if noise_mode == 'asym_two_unbalanced_classes': for i in range(len(self.test_label)): if self.test_label[i] != 1: self.test_label[i] = 0 #print("self.test_label=",self.test_label) elif dataset=='cifar100': test_dic = unpickle('%s/test'%root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['fine_labels'] else: train_data=[] train_label=[] if dataset=='cifar10': for n in range(1,6): dpath = '%s/data_batch_%d'%(root_dir,n) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label+data_dic['labels'] train_data = np.concatenate(train_data) if noise_mode == 'asym_two_unbalanced_classes': for i in range(len(train_label)): if train_label[i] != 1: train_label[i] = 0 #print("train_label=",train_label) elif dataset=='cifar100': train_dic = unpickle('%s/train'%root_dir) train_data = train_dic['data'] train_label = train_dic['fine_labels'] train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) #每次重新获得第一次初始化的数据 if os.path.exists(noise_file): noise_label = json.load(open(noise_file,"r")) else: #inject noise noise_label = [] idx = list(range(50000)) random.shuffle(idx) #num_noise = int(self.r*50000) if noise_mode == 'sym': num_noise = int((self.r / 9) / ( 1-self.r + self.r / 9 ) * 50000) else: num_noise = int(self.r * 50000) noise_idx = idx[:num_noise] for i in range(50000): if i in noise_idx: if noise_mode=='sym': if dataset=='cifar10': noiselabel = random.randint(0, 9) #print("noiselabel=",noiselabel) #print("train_label[i]=",train_label[i]) while noiselabel == train_label[i]: noiselabel = random.randint(0,9) elif dataset=='cifar100': noiselabel = random.randint(0,99) noise_label.append(noiselabel) elif noise_mode=='asym_two_unbalanced_classes': noiselabel = self.transition[train_label[i]] noise_label.append(noiselabel) else: noise_label.append(train_label[i]) print("save noisy labels to %s ..."%noise_file) json.dump(noise_label,open(noise_file,"w")) if self.mode == 'all': self.train_data = train_data self.noise_label = noise_label else: if self.mode == "labeled": pred_idx = pred.nonzero()[0]#二维数组,返回第几个samples,[0]表示第几行 self.probability = [probability[i] for i in pred_idx] clean = (np.array(noise_label)==np.array(train_label)) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability,clean) auc,_,_ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n'%(pred.sum(),auc)) log.flush() elif self.mode == "unlabeled": pred_idx = (1-pred).nonzero()[0] self.train_data = train_data[pred_idx]# 每次初始化的时候 这里面是有label的数据 或者 无label的数据 self.noise_label = [noise_label[i] for i in pred_idx] #每次初始化的时候 这里面是有label的数据 或者 无label的数据 print("%s data has a size of %d"%(self.mode,len(self.noise_label)))
import torchvision import torchvision.transforms as transforms from torch.utils.data import DataLoader from asteroid_dataset import * import torch.optim as optim from classifier import * import visdom from torchnet.meter import ConfusionMeter from torchnet.meter import AUCMeter from sklearn.metrics import matthews_corrcoef confusion_matrix = ConfusionMeter(2) # temp_confusion_matrix = ConfusionMeter(2) auc_meter = AUCMeter() # confusion_matrix_validation = ConfusionMeter(2) vis = visdom.Visdom() draw_graph = None draw_accuracy = None draw_roc_curve = None csv_file = "classifications.csv" root_dir = "data/" # hyperparameters batch_size = 159 learning_rate = 0.001 epoch_num = 50 # experiment parameters real_exp = True
def __init__(self, device): self.device = device # super(AllInOneMeter, self).__init__() self.out1auc1 = AUCMeter() self.out1auc2 = AUCMeter() self.out1auc3 = AUCMeter() self.out1auc4 = AUCMeter() self.out1auc5 = AUCMeter() self.out2auc1 = AUCMeter() self.out2auc2 = AUCMeter() self.out2auc3 = AUCMeter() self.out2auc4 = AUCMeter() self.out2auc5 = AUCMeter() self.loss1 = [] self.loss2 = [] self.loss3 = [] self.loss = [] self.jaccard = [] # self.nbatch = 0 self.intersection = torch.zeros([5], dtype=torch.float, device=self.device) self.union = torch.zeros([5], dtype=torch.float, device=self.device) self.reset()
class AllInOneMeter(object): """ All in one meter: AUC """ def __init__(self, device): self.device = device # super(AllInOneMeter, self).__init__() self.out1auc1 = AUCMeter() self.out1auc2 = AUCMeter() self.out1auc3 = AUCMeter() self.out1auc4 = AUCMeter() self.out1auc5 = AUCMeter() self.out2auc1 = AUCMeter() self.out2auc2 = AUCMeter() self.out2auc3 = AUCMeter() self.out2auc4 = AUCMeter() self.out2auc5 = AUCMeter() self.loss1 = [] self.loss2 = [] self.loss3 = [] self.loss = [] self.jaccard = [] # self.nbatch = 0 self.intersection = torch.zeros([5], dtype=torch.float, device=self.device) self.union = torch.zeros([5], dtype=torch.float, device=self.device) self.reset() def reset(self): # self.scores = torch.DoubleTensor(torch.DoubleStorage()).numpy() # self.targets = torch.LongTensor(torch.LongStorage()).numpy() self.out1auc1.reset() self.out1auc2.reset() self.out1auc3.reset() self.out1auc4.reset() self.out1auc5.reset() self.out2auc1.reset() self.out2auc2.reset() self.out2auc3.reset() self.out2auc4.reset() self.out2auc5.reset() self.loss1 = [] self.loss2 = [] self.loss3 = [] self.loss = [] self.jaccard = [] self.intersection = torch.zeros([5], dtype=torch.float, device=self.device) self.union = torch.zeros([5], dtype=torch.float, device=self.device) # self.nbatch = 0 def add(self, mask_prob, true_mask, mask_ind_prob1, mask_ind_prob2, true_mask_ind, loss1, loss2, loss3, loss): self.out1auc1.add(mask_ind_prob1[:, 0].data, true_mask_ind[:, 0].data) self.out1auc2.add(mask_ind_prob1[:, 1].data, true_mask_ind[:, 1].data) self.out1auc3.add(mask_ind_prob1[:, 2].data, true_mask_ind[:, 2].data) self.out1auc4.add(mask_ind_prob1[:, 3].data, true_mask_ind[:, 3].data) self.out1auc5.add(mask_ind_prob1[:, 4].data, true_mask_ind[:, 4].data) self.out2auc1.add(mask_ind_prob2[:, 0].data, true_mask_ind[:, 0].data) self.out2auc2.add(mask_ind_prob2[:, 1].data, true_mask_ind[:, 1].data) self.out2auc3.add(mask_ind_prob2[:, 2].data, true_mask_ind[:, 2].data) self.out2auc4.add(mask_ind_prob2[:, 3].data, true_mask_ind[:, 3].data) self.out2auc5.add(mask_ind_prob2[:, 4].data, true_mask_ind[:, 4].data) self.loss1.append(loss1) self.loss2.append(loss2) self.loss3.append(loss3) self.loss.append(loss) # self.nbatch += true_mask.shape[0] y_pred = (mask_prob > 0.3).type(true_mask.dtype) y_true = true_mask self.intersection += (y_pred * y_true).sum(dim=-2).sum(dim=-1).sum(dim=0) self.union += y_true.sum(dim=-2).sum(dim=-1).sum(dim=0) + y_pred.sum( dim=-2).sum(dim=-1).sum(dim=0) def value(self): jaccard_array = (self.intersection / (self.union - self.intersection)) # jaccard_array = jaccard_array.data.cpu().numpy() jaccard = jaccard_array.mean() metrics = { 'out1auc1': self.out1auc1.value()[0], 'out1auc2': self.out1auc2.value()[0], 'out1auc3': self.out1auc3.value()[0], 'out1auc4': self.out1auc4.value()[0], 'out1auc5': self.out1auc5.value()[0], 'out2auc1': self.out2auc1.value()[0], 'out2auc2': self.out2auc2.value()[0], 'out2auc3': self.out2auc3.value()[0], 'out2auc4': self.out2auc4.value()[0], 'out2auc5': self.out2auc5.value()[0], 'loss1': np.mean(self.loss1), 'loss2': np.mean(self.loss2), 'loss3': np.mean(self.loss3), 'loss': np.mean(self.loss), 'jaccard': jaccard.item(), 'jaccard1': jaccard_array[0].item(), 'jaccard2': jaccard_array[1].item(), 'jaccard3': jaccard_array[2].item(), 'jaccard4': jaccard_array[3].item(), 'jaccard5': jaccard_array[4].item(), } return metrics
def test(segment_size_list): if test_dataset == 'MTAT': test_list_pub = pickle.load(open(os.path.join(MTAT_SPLIT_FOLDER, 'test_list_pub.cP'), 'rb')) if test_dataset == 'MSD': id7d_to_path = pickle.load(open(os.path.join(MSD_SPLIT_FOLDER, '7D_id_to_path.pkl'), 'rb')) idmsd_to_id7d = pickle.load( open(os.path.join(MSD_SPLIT_FOLDER, 'MSD_id_to_7D_id.pkl'), 'rb')) test_list_pub_id = pickle.load( open(os.path.join(MSD_SPLIT_FOLDER, 'filtered_list_test.cP'), 'rb')) test_list_pub = [id7d_to_path[idmsd_to_id7d[song]][:-9] + '.npy' for song in test_list_pub_id] del id7d_to_path, idmsd_to_id7d total_test_size = len(test_list_pub) n_inputs = 0 for segment_size in segment_size_list: if segment_size == 18: n_inputs += 512 if segment_size == 27: n_inputs += 512 if segment_size == 54: n_inputs += 768 if segment_size == 108: n_inputs += 1024 if segment_size == 216: n_inputs += 1280 local_models = [] for segment_size in segment_size_list: loc_model = local_model(segment_size).cuda() loc_model.load_state_dict(os.path.join(ENCODER_FOLDER, torch.load('local_model_' + str(segment_size) + '.pt'))) loc_model.eval() local_models.append(loc_model) model = global_model(n_inputs, 512).cuda() model.load_state_dict(torch.load(os.path.join(ENCODER_FOLDER, 'global_model_18_27_54_9051_123.pt'))) model.eval() auc = AUCMeter() for start in range(0, total_test_size, n_songs): print("Loading dataset...", start) if test_dataset == 'MTAT': test_features = np.concatenate( [np.load(os.path.join(MTAT_NPY_FOLDER, 'testing/' + test_list_pub[i])) for i in range(start, min(start + n_songs, total_test_size))]) test_labels = np.load( os.path.join(MTAT_SPLIT_FOLDER, 'y_test_pub.npy'))[start:min(start + n_songs, total_test_size)] if test_dataset == 'MSD': test_features = np.concatenate( [np.expand_dims(np.load(os.path.join(MSD_NPY_FOLDER, 'testing/' + test_list_pub[i]))[:, :1255], axis=0) for i in range(start, min(start + n_songs, total_test_size))]) idmsd_to_tag = pickle.load( open(os.path.join(MSD_SPLIT_FOLDER, 'msd_id_to_tag_vector.cP'), 'rb')) test_labels = np.concatenate( [idmsd_to_tag[idmsd] for idmsd in test_list_pub_id[start:min(start + n_songs, total_test_size)]], axis=1) if normalization: mean = np.mean(test_features, axis=0) var = np.var(test_features, axis=0) test_features = (test_features - mean) / np.sqrt(var) test_data = CustomDataset(test_features, test_labels) test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size) print("Dataset loaded") for data, labels in test_loader: X = Variable(data).cuda() X = torch.cat([loc_model(X)[1] for loc_model in local_models], dim=1) out, _ = model(X) auc_out = np.reshape(out.data.cpu().numpy(), -1) auc_target = np.reshape(labels, -1) auc.add(auc_out, auc_target) del test_features, test_labels, test_data, test_loader auc_tuple = auc.value() print("AUC = ", auc_tuple[0])
def __init__(self, dataset, r, noise_mode, root_dir, transform, mode, noise_file='', clean_file='', pred=[], probability=[], log=''): self.r = r # noise ratio self.transform = transform self.noise_mode = noise_mode self.mode = mode self.transition = { 0: 0, 2: 0, 4: 7, 7: 7, 1: 1, 9: 1, 3: 5, 5: 3, 6: 6, 8: 8 } # class transition for asymmetric noise if self.mode == 'test': if dataset == 'cifar10': test_dic = unpickle('%s/data/cifar-10-batches-py/test_batch' % root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['labels'] elif dataset == 'cifar100': test_dic = unpickle('%s/data/cifar-100-python/test' % root_dir) self.test_data = test_dic['data'] self.test_data = self.test_data.reshape((10000, 3, 32, 32)) self.test_data = self.test_data.transpose((0, 2, 3, 1)) self.test_label = test_dic['fine_labels'] else: train_data = [] train_label = [] if dataset == 'cifar10': #print("current path is {}".format(sys.path[0])) for n in range(1, 6): dpath = '%s/data/cifar-10-batches-py/data_batch_%d' % ( root_dir, n) #print("path is {}".format(dpath)) data_dic = unpickle(dpath) train_data.append(data_dic['data']) train_label = train_label + data_dic['labels'] train_data = np.concatenate(train_data) elif dataset == 'cifar100': train_dic = unpickle('%s/data/cifar-100-python/train' % root_dir) train_data = train_dic['data'] train_label = train_dic['fine_labels'] train_data = train_data.reshape((50000, 3, 32, 32)) train_data = train_data.transpose((0, 2, 3, 1)) train_label = np.array(train_label) noise_label = train_label.copy() if dataset == 'cifar10': nb_classes = 10 elif dataset == 'cifar100': nb_classes = 100 clean_per_class = int(5000 / nb_classes) # cifar10: 100 else: 10 noise_per_class = int(50000 / nb_classes * r) #select clean_per_class numbers of data in each class as clean data #leave the other data to add noise #the 0th data processing is at the outer loop #0th add noise (for index) all_index = np.arange(50000).reshape(-1) clean_indices = all_index[np.where( train_label == 0)[0]][-clean_per_class:] noise_idx = [ all_index[np.where(train_label == 0)[0]][:-clean_per_class] ] #from 1th to 9th to add noise (for index) for i in range(nb_classes - 1): indices1 = all_index[np.where(train_label == i + 1)[0]][-clean_per_class:] noisy_indices1 = all_index[np.where(train_label == i + 1)[0]][:-clean_per_class] clean_indices = np.concatenate((clean_indices, indices1)) noise_idx.append(noisy_indices1) #add noise for t, i in enumerate(noise_idx): # randomly selected one image as the center image_center = train_data[i[10]] norm_loss = np.zeros(len(i)) for j, k in enumerate(i): images = train_data[k] norm_loss[j] = np.linalg.norm(image_center - images) noisy_indices = i[norm_loss.argsort()[:noise_per_class]] noise_label[noisy_indices] = (t + 1) % nb_classes if self.mode == 'all': self.train_data = train_data self.noise_label = noise_label elif self.mode == 'small': self.train_data = train_data[::100] self.noise_label = noise_label[::100] else: if self.mode == "labeled": pred_idx = pred.nonzero()[0] self.probability = [probability[i] for i in pred_idx] #clean = (np.array(noise_label)==np.array(train_label)) clean = (noise_label == train_label) auc_meter = AUCMeter() auc_meter.reset() auc_meter.add(probability, clean) auc, _, _ = auc_meter.value() log.write('Numer of labeled samples:%d AUC:%.3f\n' % (pred.sum(), auc)) log.flush() elif self.mode == "unlabeled": pred_idx = (1 - pred).nonzero()[0] self.train_data = train_data[pred_idx] self.noise_label = noise_label[pred_idx] print("%s data has a size of %d" % (self.mode, len(self.noise_label)))