def transformation_cifar10_vs_tinyimagenet(): _, (x_test, y_test) = load_cifar10() x_test_out = load_tinyimagenet('/home/izikgo/Imagenet_resize/Imagenet_resize/') transformer = Transformer(8, 8) n = 16 k = 8 base_mdl = create_wide_residual_network(x_test.shape[1:], 10, n, k) transformations_cls_out = Activation('softmax')(dense(transformer.n_transforms)(base_mdl.get_layer(index=-3).output)) mdl = Model(base_mdl.input, [base_mdl.output, transformations_cls_out]) mdl.load_weights('cifar10_WRN_doublehead-transformations_{}-{}.h5'.format(n, k)) scores_mdl = Model(mdl.input, mdl.output[1]) x_test_all = np.concatenate((x_test, x_test_out)) preds = np.zeros((len(x_test_all), transformer.n_transforms)) for t in range(transformer.n_transforms): preds[:, t] = scores_mdl.predict(transformer.transform_batch(x_test_all, [t] * len(x_test_all)), batch_size=128)[:, t] labels = np.concatenate((np.ones(len(x_test)), np.zeros(len(x_test_out)))) scores = preds.mean(axis=-1) save_roc_pr_curve_data(scores, labels, 'cifar10-vs-tinyimagenet_transformations.npz')
def osoc_test(hyper_para): transformer = Transformer(8, 8) C = wrn.WideResNet(28, num_classes=10, dropout_rate=0, widen_factor=10).cuda() C.load_state_dict(torch.load(hyper_para.experiment_name + '.pth')) single_class_ind = hyper_para.inclass[0] (x_train, y_train), (x_test, y_test) = load_cifar10() C.eval() score = [] lbl = [] features = [] correct = 0 total = 0 dset = CustomDataset(x_test, y_test) testLoader = DataLoader(dset, batch_size=128) for i, (inputs0, labels) in enumerate(testLoader): inputs = inputs0.permute(0, 3, 2, 1) if hyper_para.gpu: inputs = inputs.cuda() labels = labels.cuda() act, f = C(inputs) features += f.detach().cpu().tolist() val, ind = torch.max(act, dim=1) #score += val.detach().cpu().tolist() CC = wrn.WideResNet(28, num_classes=72, dropout_rate=0, widen_factor=10).cuda() val, ind, labels = (val.detach().cpu().tolist(), ind.detach().cpu().tolist(), labels.detach().cpu().tolist()) for ind, (ii, gt) in enumerate(zip(ind, labels)): gt = gt[0] CC.load_state_dict( torch.load('saved/' + hyper_para.experiment_name + '_' + str(gt) + '.pth')) x_test0 = transformer.transform_batch( np.expand_dims(inputs0[ind, :, :, :].detach().cpu().numpy(), 0), [0]) x_test0 = torch.tensor(x_test0).permute(0, 3, 2, 1).cuda() act, f = CC(x_test0) act = act.detach().cpu().tolist() score += [act[0][0]] if gt in hyper_para.inclass: total += 1 if ii == gt: correct += 1 lbl.append(1) else: lbl.append(0) break fpr, tpr, thresholds = metrics.roc_curve(lbl, score) AUC = metrics.auc(fpr, tpr) ACC = float(correct) / total print('AUROC: ' + str(AUC) + '\t Accuracy: ' + str(ACC))
def __init__(self, rules, values): wx.grid.PyGridTableBase.__init__(self) # crear transformador transformer = Transformer(rules) # cargar datos y transformarlos self.data = [[value, transformer.transform_value(value)] for value in values] self.column_labels = [u"Valor Original", u"Valor Transformado"]
def _build(self): self.params = {} self.latent_values = None # Build the transformer beta_warp = BetaWarp(self.num_dims) beta_alpha, beta_beta = beta_warp.hypers self.params['beta_alpha'] = beta_alpha self.params['beta_beta'] = beta_beta transformer = Transformer(self.num_dims) transformer.add_layer(beta_warp) # Build the component kernels input_kernel = Matern52(self.num_dims) ls = input_kernel.hypers self.params['ls'] = ls # Now apply the transformation. transform_kernel = TransformKernel(input_kernel, transformer) # Add some perturbation for stability stability_noise = Noise(self.num_dims) # Finally make a noisy version if necessary # In a classifier GP the notion of "noise" is really just the scale. if self.noiseless: self._kernel = SumKernel(transform_kernel, stability_noise) else: scaled_kernel = Scale(transform_kernel) self._kernel = SumKernel(scaled_kernel, stability_noise) amp2 = scaled_kernel.hypers self.params['amp2'] = amp2 # Build the mean function (just a constant mean for now) self.mean = Hyperparameter( initial_value = 0.0, prior = priors.Gaussian(0.0,1.0), name = 'mean' ) self.params['mean'] = self.mean # Buld the latent values. Empty for now until the GP gets data. self.latent_values = Hyperparameter( initial_value = np.array([]), name = 'latent values' ) # Build the samplers to_sample = [self.mean] if self.noiseless else [self.mean, amp2] self._samplers.append(SliceSampler(*to_sample, compwise=False, thinning=self.thinning)) self._samplers.append(WhitenedPriorSliceSampler(ls, beta_alpha, beta_beta, compwise=True, thinning=self.thinning)) self.latent_values_sampler = EllipticalSliceSampler(self.latent_values, thinning=self.ess_thinning)
def _build(self): # Build the transformer beta_warp = BetaWarp(self.num_dims) transformer = Transformer(self.num_dims) transformer.add_layer(beta_warp) # Build the component kernels input_kernel = Matern52(self.num_dims) stability_noise_kernel = Noise(self.num_dims) # Even if noiseless we use some noise for stability scaled_input_kernel = Scale(input_kernel) sum_kernel = SumKernel(scaled_input_kernel, stability_noise_kernel) noise_kernel = Noise(self.num_dims) # The final kernel applies the transformation. self._kernel = TransformKernel(sum_kernel, transformer) # Finally make a noisy version if necessary if not self.noiseless: self._kernel_with_noise = SumKernel(self._kernel, noise_kernel) # Build the mean function (just a constant mean for now) self.mean = Hyperparameter( initial_value = 0.0, prior = priors.Gaussian(0.0,1.0), name = 'mean' ) # Get the hyperparameters to sample ls = input_kernel.hypers amp2 = scaled_input_kernel.hypers beta_alpha, beta_beta = beta_warp.hypers self.params = { 'mean' : self.mean, 'amp2' : amp2, 'ls' : ls, 'beta_alpha' : beta_alpha, 'beta_beta' : beta_beta } # Build the samplers if self.noiseless: self._samplers.append(SliceSampler(self.mean, amp2, compwise=False, thinning=self.thinning)) else: noise = noise_kernel.hypers self.params.update({'noise' : noise}) self._samplers.append(SliceSampler(self.mean, amp2, noise, compwise=False, thinning=self.thinning)) self._samplers.append(SliceSampler(ls, beta_alpha, beta_beta, compwise=True, thinning=self.thinning))
def os_test_ens(testLoader, hyper_para, C, isauc): C = wrn.WideResNet(28, num_classes=72, dropout_rate=0, widen_factor=10).cuda() C.load_state_dict( torch.load('saved/' + hyper_para.experiment_name + '_' + str(hyper_para.inclass[0]) + '.pth')) single_class_ind = hyper_para.inclass[0] (x_train, y_train), (x_test, y_test) = load_cifar10() transformer = Transformer(8, 8) glabels = y_test.flatten() == single_class_ind C.cuda().eval() scores = np.array([[]]) features = [] correct = 0 total = 0 preds = np.zeros((len(x_test), transformer.n_transforms)) for t in [0]: #range(72): score = [] x_test0 = transformer.transform_batch(x_test, [t] * len(x_test)) dset = CustomDataset(x_test0, [t] * len(x_test)) testLoader = DataLoader(dset, batch_size=128, shuffle=False) for i, (inputs, labels) in enumerate(testLoader): inputs = inputs.permute(0, 3, 2, 1) if True: inputs = inputs.cuda() labels = labels.cuda() act, f = C(inputs) features += f.detach().cpu().tolist() #act = torch.nn.functional.softmax(act, dim=1) score += act[:, t].detach().cpu().tolist() preds[:, t] = list(score) fpr, tpr, thresholds = metrics.roc_curve(glabels, score) AUC = metrics.auc(fpr, tpr) print('AUROC: ' + str(AUC)) scores = np.sum(((preds)), 1) fpr, tpr, thresholds = metrics.roc_curve(glabels, scores) AUC = metrics.auc(fpr, tpr) print('AUROC: ' + str(AUC)) return ([0, 0])
class RotateDataset(Dataset): def __init__(self, x_tensor): self.x = x_tensor self.transformer = Transformer(8, 8) def __getitem__(self, index): trans_id = np.random.randint(self.transformer.n_transforms, size=1)[0] #return(self.transformer.transform_sample(self.x[index], trans_id), trans_id) return ((self.transformer.transform_batch( np.expand_dims(self.x[index], 0), [trans_id]))[0], trans_id) def __len__(self): return len(self.x)
class RotateDataset(Dataset): def __init__(self, x_tensor, lbl): self.x = x_tensor self.y = lbl self.perm = np.random.permutation(len(x_tensor)) self.transformer = Transformer(8, 8) def __getitem__(self, index): index = self.perm[index] trans_id = np.random.randint(self.transformer.n_transforms, size=1)[0] #return(self.transformer.transform_sample(self.x[index], trans_id), trans_id) return ((self.transformer.transform_batch( np.expand_dims(self.x[index], 0), [trans_id]))[0], trans_id, self.y[index]) def __len__(self): return len(self.x)
def train_cifar10_transformations(): (x_train, y_train), _ = load_cifar10() transformer = Transformer(8, 8) def data_gen(x, y, batch_size): while True: ind_permutation = np.random.permutation(len(x)) for b_start_ind in range(0, len(x), batch_size): batch_inds = ind_permutation[b_start_ind:b_start_ind + batch_size] x_batch = x[batch_inds] y_batch = y[batch_inds].flatten() if K.image_data_format() == 'channels_first': x_batch = np.transpose(x_batch, (0, 2, 3, 1)) y_t_batch = np.random.randint(0, transformer.n_transforms, size=len(x_batch)) x_batch = transformer.transform_batch(x_batch, y_t_batch) if K.image_data_format() == 'channels_first': x_batch = np.transpose(x_batch, (0, 3, 1, 2)) yield (x_batch, [to_categorical(y_batch, num_classes=10), to_categorical(y_t_batch, num_classes=transformer.n_transforms)]) n = 16 k = 8 base_mdl = create_wide_residual_network(x_train.shape[1:], 10, n, k) transformations_cls_out = Activation('softmax')(dense(transformer.n_transforms)(base_mdl.get_layer(index=-3).output)) mdl = Model(base_mdl.input, [base_mdl.output, transformations_cls_out]) mdl.compile(SGDTorch(lr=.1, momentum=0.9, nesterov=True), 'categorical_crossentropy', ['acc']) lr_cb = LearningRateScheduler(lambda e: 0.1 * (0.2 ** (e >= 160 and 3 or e >= 120 and 2 or e >= 60 and 1 or 0))) batch_size = 128 mdl.fit_generator( generator=data_gen(x_train, y_train, batch_size=batch_size), steps_per_epoch=len(x_train) // batch_size, epochs=200, callbacks=[lr_cb] ) mdl.save_weights('cifar10_WRN_doublehead-transformations_{}-{}.h5'.format(n, k))
def _transformations_experiment(dataset_load_fn, dataset_name, single_class_ind, gpu_q): # gpu_to_use = gpu_q.get() # os.environ["CUDA_VISIBLE_DEVICES"] = gpu_to_use (x_train, y_train), (x_test, y_test) = dataset_load_fn() if dataset_name in ['cats-vs-dogs']: transformer = Transformer(16, 16) n, k = (16, 8) else: transformer = Transformer(8, 8) n, k = (10, 4) mdl = create_wide_residual_network(x_train.shape[1:], transformer.n_transforms, n, k) mdl.compile('adam', 'categorical_crossentropy', ['acc']) # get inliers of specific class x_train_task = x_train[y_train.flatten() == single_class_ind] # [0_i, ..., (N_transforms-1)_i, ..., ..., 0_N_samples, ..., # (N_transforms-1)_N_samples] shape: (N_transforms*N_samples,) transformations_inds = np.tile(np.arange(transformer.n_transforms), len(x_train_task)) x_train_task_transformed = transformer.transform_batch( np.repeat(x_train_task, transformer.n_transforms, axis=0), transformations_inds) batch_size = 128 mdl.fit(x=x_train_task_transformed, y=to_categorical(transformations_inds), batch_size=batch_size, epochs=int(np.ceil(200 / transformer.n_transforms))) scores = np.zeros((len(x_test), )) matrix_evals = np.zeros( (len(x_test), transformer.n_transforms, transformer.n_transforms)) observed_data = x_train_task for t_ind in range(transformer.n_transforms): observed_dirichlet = mdl.predict(transformer.transform_batch( observed_data, [t_ind] * len(observed_data)), batch_size=1024) log_p_hat_train = np.log(observed_dirichlet).mean(axis=0) alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet) alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train) x_test_p = mdl.predict(transformer.transform_batch( x_test, [t_ind] * len(x_test)), batch_size=1024) matrix_evals[:, :, t_ind] += x_test_p scores += dirichlet_normality_score(mle_alpha_t, x_test_p) scores /= transformer.n_transforms matrix_evals /= transformer.n_transforms scores_simple = np.trace(matrix_evals, axis1=1, axis2=2) scores_entropy = get_entropy(matrix_evals) scores_xH = get_xH(transformer, matrix_evals) labels = y_test.flatten() == single_class_ind save_results_file(dataset_name, single_class_ind, scores=scores, labels=labels, experiment_name='transformations') save_results_file(dataset_name, single_class_ind, scores=scores_simple, labels=labels, experiment_name='transformations-simple') save_results_file(dataset_name, single_class_ind, scores=scores_entropy, labels=labels, experiment_name='transformations-entropy') save_results_file(dataset_name, single_class_ind, scores=scores_xH, labels=labels, experiment_name='transformations-xH') mdl_weights_name = '{}_transformations_{}_{}_weights.h5'.format( dataset_name, get_class_name_from_index(single_class_ind, dataset_name), datetime.datetime.now().strftime('%Y-%m-%d-%H%M')) mdl_weights_path = os.path.join(RESULTS_DIR, dataset_name, mdl_weights_name) mdl.save_weights(mdl_weights_path)
def __init__(self, x_tensor, lbl): self.x = x_tensor self.y = lbl self.perm = np.random.permutation(len(x_tensor)) self.transformer = Transformer(8, 8)
def octest0(hyper_para): C = wrn.WideResNet(28, num_classes=72, dropout_rate=0, widen_factor=10).cuda() C.load_state_dict( torch.load(hyper_para.experiment_name + '_' + str(hyper_para.inclass[0]) + '.pth')) single_class_ind = hyper_para.inclass[0] if hyper_para.source == 'mnist': (x_train, y_train), (x_test, y_test) = load_mnist() elif hyper_para.source == 'svhn': (x_train, y_train), (x_test, y_test) = load_svhn() elif hyper_para.source == 'amazon': (x_train, y_train), (x_test, y_test) = load_amazon() elif hyper_para.source == 'dslr': (x_train, y_train), (x_test, y_test) = load_dslr() transformer = Transformer(8, 8) glabels = y_test.flatten() == single_class_ind print(hyper_para.source) print(len(x_test)) print(np.sum(glabels)) C.cuda().eval() scores = np.array([[]]) features = [] correct = 0 total = 0 preds = np.zeros((len(x_test), transformer.n_transforms)) for t in range(1): score = [] dset = CustomDataset(x_test, y_test) testLoader = DataLoader(dset, batch_size=128, shuffle=False) transformer = Transformer(8, 8) for i, (inputs, labels) in enumerate(testLoader): inputs = torch.tensor( transformer.transform_batch(inputs.detach().cpu().numpy(), [t] * len(inputs))) inputs = inputs.permute(0, 3, 2, 1) if True: inputs = inputs.cuda() labels = labels.cuda() act, f = C(inputs) features += f.detach().cpu().tolist() #act = torch.nn.functional.softmax(act, dim=1) score += act[:, t].detach().cpu().tolist() preds[:, t] = list(score) fpr, tpr, thresholds = metrics.roc_curve(glabels, score) AUC = metrics.auc(fpr, tpr) #print('AUROC: ' + str(AUC) ) scores = np.sum(((preds)), 1) fpr, tpr, thresholds = metrics.roc_curve(glabels, scores) AUC = metrics.auc(fpr, tpr) print('AUROC: ' + str(AUC)) file1 = open("AUC.txt", "a") #append mode file1.write(hyper_para.source + "\t" + hyper_para.method + "\t" + str(hyper_para.inclass[0]) + "\t" + hyper_para.experiment_name + "\t" + str(AUC) + "\n") file1.close() C = wrn.WideResNet(28, num_classes=72, dropout_rate=0, widen_factor=10).cuda() C.load_state_dict( torch.load(hyper_para.experiment_name + '_' + str(hyper_para.inclass[0]) + '.pth')) single_class_ind = hyper_para.inclass[0] if hyper_para.target == 'mnist': (x_train, y_train), (x_test, y_test) = load_mnist() elif hyper_para.target == 'svhn': (x_train, y_train), (x_test, y_test) = load_svhn() elif hyper_para.target == 'amazon': (x_train, y_train), (sx_test, y_test) = load_amazon() elif hyper_para.target == 'dslr': (x_train, y_train), (x_test, y_test) = load_dslr() transformer = Transformer(8, 8) C.cuda().eval() scores = np.array([[]]) features = [] correct = 0 total = 0 preds = np.zeros((len(x_test) + len(x_train), transformer.n_transforms)) for t in range(1): score = [] dset = CustomDataset(np.concatenate((x_test, x_train), 0), np.concatenate((y_test, y_train), 0)) testLoader = DataLoader(dset, batch_size=128, shuffle=False) transformer = Transformer(8, 8) for i, (inputs, labels) in enumerate(testLoader): inputs = torch.tensor( transformer.transform_batch(inputs.detach().cpu().numpy(), [t] * len(inputs))) inputs = inputs.permute(0, 3, 2, 1) if True: inputs = inputs.cuda() labels = labels.cuda() act, f = C(inputs) features += f.detach().cpu().tolist() #act = torch.nn.functional.softmax(act, dim=1) score += act[:, t].detach().cpu().tolist() preds[:, t] = list(score) #print('AUROC: ' + str(AUC) ) glabels = (np.concatenate((y_test, y_train))).flatten() == single_class_ind scores = np.sum(((preds)), 1) fpr, tpr, thresholds = metrics.roc_curve(glabels, scores) AUC = metrics.auc(fpr, tpr) print('AUROC: ' + str(AUC)) file1 = open("AUC.txt", "a") #append mode file1.write(hyper_para.target + "\t" + hyper_para.method + "\t" + str(hyper_para.inclass[0]) + "\t" + hyper_para.experiment_name + "\t" + str(AUC) + "\n") file1.close()
def oceval(hyper_para): import sklearn for single_class_ind in hyper_para.inclass: C = wrn.WideResNet(28, num_classes=72, dropout_rate=0, widen_factor=10) C.load_state_dict( torch.load(hyper_para.experiment_name + '_' + str(hyper_para.inclass[0]) + '.pth')) C.cuda() if hyper_para.source == 'mnist': (x_train, y_train), (x_test, y_test) = load_mnist() (x_train2, y_train2), (x_test2, y_test2) = load_svhn() elif hyper_para.source == 'svhn': (x_train, y_train), (x_test, y_test) = load_svhn() (x_train2, y_train2), (x_test2, y_test2) = load_mnist() elif hyper_para.source == 'amazon': (x_train, y_train), (x_test, y_test) = load_amazon() (x_train2, y_train2), (x_test2, y_test2) = load_dslr() elif hyper_para.source == 'dslr': (x_train, y_train), (x_test, y_test) = load_dslr() (x_train2, y_train2), (x_test2, y_test2) = load_amazon() if hyper_para.method == 'justsource': x_train_task = x_train[y_train.flatten() == single_class_ind] x_test = x_train_task[int(len(x_train_task) * 0.8):] x_train_task = x_train_task[0:int(len(x_train_task) * 0.8)] domain_lbl = [0] * len(x_train_task) tst_lbl = [0] * len(x_test) dset2 = RotateDataset(x_test, tst_lbl) testLoader = DataLoader(dset2, batch_size=512, shuffle=True) else: #if hyper_para.method == 'balancedsourcetarget': x_train_task = x_train[y_train.flatten() == single_class_ind] x_train_task2 = x_train2[y_train2.flatten() == single_class_ind] x_train_task2 = x_train_task2[0:hyper_para.target_n] x_test = x_train_task[int(len(x_train_task) * 0.8):] x_train_task = x_train_task[0:int(len(x_train_task) * 0.8)] x_test2 = x_train_task2[int(len(x_train_task2) * 0.8):] x_train_task2 = x_train_task2[0:int(len(x_train_task2) * 0.8)] domain_lbl = [0] * len(x_train_task) + [1] * (len( x_train_task2) * int(len(x_train_task) / len(x_train_task2))) x_train_task2 = np.tile( x_train_task2, (int(len(x_train_task) / len(x_train_task2)), 1, 1, 1)) x_train_task = np.concatenate((x_train_task, x_train_task2)) tst_lbl = [0] * len(x_test) + [1] * ( len(x_test2) * int(len(x_test) / len(x_test2))) x_test2 = np.tile(x_test2, (int(len(x_test) / len(x_test2)), 1, 1, 1)) x_test = np.concatenate((x_test, x_test2)) dset2 = RotateDataset(x_test, tst_lbl) testLoader = DataLoader(dset2, batch_size=512, shuffle=True) transformer = Transformer(8, 8) dset = RotateDataset(x_train_task, domain_lbl) trainLoader = DataLoader(dset, batch_size=512, shuffle=True) np.set_printoptions(threshold=sys.maxsize) correct = 0 total = 0 preds = [] target = [] for t in range(10): for i, (inputs, labels, dlbls) in enumerate(testLoader): inputs = inputs.permute(0, 3, 2, 1) if True: inputs = inputs.cuda() labels = labels.cuda() act, f = C(inputs) _, ind = torch.max(act, 1) target += labels.detach().cpu().tolist() preds += ind.detach().cpu().tolist() import matplotlib import matplotlib.pyplot as plt cm = sklearn.metrics.confusion_matrix(target, preds, normalize='true') #plt.imshow(cm) #plt.show() cm = np.diag(cm) cm = cm / np.sum(cm) * 72 return cm
def octrain(hyper_para): for single_class_ind in hyper_para.inclass: hyper_para.C = wrn.WideResNet(28, num_classes=72, dropout_rate=0, widen_factor=10) if hyper_para.source == 'mnist': (x_train, y_train), (x_test, y_test) = load_mnist() (x_train2, y_train2), (x_test2, y_test2) = load_svhn() elif hyper_para.source == 'svhn': (x_train, y_train), (x_test, y_test) = load_svhn() (x_train2, y_train2), (x_test2, y_test2) = load_mnist() elif hyper_para.source == 'amazon': (x_train, y_train), (x_test, y_test) = load_amazon() (x_train2, y_train2), (x_test2, y_test2) = load_dslr() elif hyper_para.source == 'dslr': (x_train, y_train), (x_test, y_test) = load_dslr() (x_train2, y_train2), (x_test2, y_test2) = load_amazon() if hyper_para.method == 'justsource': x_train_task = x_train[y_train.flatten() == single_class_ind] x_test = x_train_task[int(len(x_train_task) * 0.8):] x_train_task = x_train_task[0:int(len(x_train_task) * 0.8)] domain_lbl = [0] * len(x_train_task) tst_lbl = [0] * len(x_test) dset2 = RotateDataset(x_test, tst_lbl) testLoader = DataLoader(dset2, batch_size=16, shuffle=True) else: x_train_task = x_train[y_train.flatten() == single_class_ind] x_train_task2 = x_train2[y_train2.flatten() == single_class_ind] x_train_task2 = x_train_task2[0:hyper_para.target_n] x_test = x_train_task[int(len(x_train_task) * 0.8):] x_train_task = x_train_task[0:int(len(x_train_task) * 0.8)] x_test2 = x_train_task2[int(len(x_train_task2) * 0.8):] x_train_task2 = x_train_task2[0:int(len(x_train_task2) * 0.8)] domain_lbl = [0] * len(x_train_task) x_train_task2 = np.tile( x_train_task2, (int(len(x_train_task) / len(x_train_task2)), 1, 1, 1)) tst_lbl = [0] * len(x_test) + [1] * ( len(x_test2) * int(len(x_test) / len(x_test2))) x_test2 = np.tile(x_test2, (int(len(x_test) / len(x_test2)), 1, 1, 1)) x_test = np.concatenate((x_test, x_test2)) dset2 = RotateDataset(x_test, tst_lbl) testLoader = DataLoader(dset2, batch_size=16, shuffle=True) transformer = Transformer(8, 8) dset = RotateDataset(x_train_task, domain_lbl) trainLoader = DataLoader(dset, batch_size=512, shuffle=True) '''transformations_inds = np.tile(np.arange(transformer.n_transforms), len(x_train_task)) x_train_task_transformed = transformer.transform_batch(np.repeat(x_train_task, transformer.n_transforms, axis=0), transformations_inds) dset = CustomDataset(x_train_task_transformed,transformations_inds ) trainLoader = DataLoader(dset, batch_size=32, shuffle=True)''' # define networks C = hyper_para.C C = torch.nn.DataParallel(C) # define loss functions ce_criterion = nn.CrossEntropyLoss() # define optimizer optimizer_c = optim.Adam(C.parameters(), lr=hyper_para.lr, betas=(0.9, 0.999)) # turn on the train mode C.train(mode=True) # initialization of auxilary variables running_tl = 0.0 running_cc = 0.0 running_rc = 0.0 running_ri = 0.0 # if gpu use cuda best_acc = 0 for i in range(100): #int(100*hyper_para.source_n/len(x_train_task))): acct = 0 nelt = 0 acc0t = 0 nel0t = 0 acc1t = 0 nel1t = 0 if hyper_para.gpu: C.cuda() for idx, (inputs, labels, dlbls) in enumerate(trainLoader): inputs = inputs.permute(0, 3, 2, 1) t1 = time.time() if hyper_para.gpu: inputs = inputs.cuda() labels = labels.cuda() dlbls = dlbls.cuda() act, fea = C(inputs) _, ind = torch.max(act, 1) if True: loss_cc = ce_criterion(act, labels) optimizer_c.zero_grad() loss = loss_cc loss.backward() optimizer_c.step() running_cc += loss_cc.data t2 = time.time() torch.save( C.module.state_dict(), hyper_para.experiment_name + '_' + str(single_class_ind) + '.pth') w = oceval(hyper_para) # finetune if hyper_para.source == 'mnist': (x_train, y_train), (x_test, y_test) = load_mnist() (x_train2, y_train2), (x_test2, y_test2) = load_svhn() elif hyper_para.source == 'svhn': (x_train, y_train), (x_test, y_test) = load_svhn() (x_train2, y_train2), (x_test2, y_test2) = load_mnist() elif hyper_para.source == 'amazon': (x_train, y_train), (x_test, y_test) = load_amazon() (x_train2, y_train2), (x_test2, y_test2) = load_dslr() elif hyper_para.source == 'dslr': (x_train, y_train), (x_test, y_test) = load_dslr() (x_train2, y_train2), (x_test2, y_test2) = load_amazon() if hyper_para.method == 'justsource': x_train_task = x_train[y_train.flatten() == single_class_ind] x_test = x_train_task[int(len(x_train_task) * 0.8):] x_train_task = x_train_task[0:int(len(x_train_task) * 0.8)] domain_lbl = [0] * len(x_train_task) tst_lbl = [0] * len(x_test) dset2 = RotateDataset(x_test, tst_lbl) testLoader = DataLoader(dset2, batch_size=16, shuffle=True) else: #if hyper_para.method == 'balancedsourcetarget': x_train_task = x_train[y_train.flatten() == single_class_ind] x_train_task2 = x_train2[y_train2.flatten() == single_class_ind] x_train_task2 = x_train_task2[0:hyper_para.target_n] x_test = x_train_task[int(len(x_train_task) * 0.8):] x_train_task = x_train_task[0:int(len(x_train_task) * 0.8)] x_test2 = x_train_task2[int(len(x_train_task2) * 0.8):] x_train_task2 = x_train_task2[0:int(len(x_train_task2) * 0.8)] domain_lbl = [0] * len(x_train_task) + [1] * (len( x_train_task2) * int(len(x_train_task) / len(x_train_task2))) x_train_task2 = np.tile( x_train_task2, (int(len(x_train_task) / len(x_train_task2)), 1, 1, 1)) x_train_task = np.concatenate((x_train_task, x_train_task2)) tst_lbl = [0] * len(x_test) + [1] * ( len(x_test2) * int(len(x_test) / len(x_test2))) x_test2 = np.tile(x_test2, (int(len(x_test) / len(x_test2)), 1, 1, 1)) x_test = np.concatenate((x_test, x_test2)) dset2 = RotateDataset(x_test, tst_lbl) testLoader = DataLoader(dset2, batch_size=16, shuffle=True) transformer = Transformer(8, 8) dset = RotateDataset(x_train_task, domain_lbl) trainLoader = DataLoader(dset, batch_size=512, shuffle=True) '''transformations_inds = np.tile(np.arange(transformer.n_transforms), len(x_train_task)) x_train_task_transformed = transformer.transform_batch(np.repeat(x_train_task, transformer.n_transforms, axis=0), transformations_inds) dset = CustomDataset(x_train_task_transformed,transformations_inds ) trainLoader = DataLoader(dset, batch_size=32, shuffle=True)''' if hyper_para.method == 'dd': Dnet = FCDiscriminator().cuda() optimizer_D = optim.Adam(Dnet.parameters(), lr=hyper_para.lr, betas=(0.9, 0.999)) Dnet.train() # define networks C = hyper_para.C C = torch.nn.DataParallel(C) # define loss functions print(w) ce_criterion = nn.CrossEntropyLoss(weight=torch.Tensor(w).cuda()) # define optimizer optimizer_c = optim.Adam(C.parameters(), lr=hyper_para.lr, betas=(0.9, 0.999)) # turn on the train mode C.train(mode=True) # initialization of auxilary variables running_tl = 0.0 running_cc = 0.0 running_rc = 0.0 running_ri = 0.0 # if gpu use cuda best_acc = 0 for i in range(500): #int(100*hyper_para.source_n/len(x_train_task))): acct = 0 nelt = 0 acc0t = 0 nel0t = 0 acc1t = 0 nel1t = 0 if hyper_para.gpu: C.cuda() for idx, (inputs, labels, dlbls) in enumerate(trainLoader): inputs = inputs.permute(0, 3, 2, 1) t1 = time.time() if hyper_para.gpu: inputs = inputs.cuda() labels = labels.cuda() dlbls = dlbls.cuda() act, fea = C(inputs) _, ind = torch.max(act, 1) if hyper_para.method == 'dd': # Train Discriminator Dnet.train() d = Dnet(fea) _, mind = torch.max(d, 1) acc = torch.mean(torch.eq(mind, dlbls).cpu().float()) optimizer_D.zero_grad() optimizer_c.zero_grad() loss_d = nnfunc.binary_cross_entropy_with_logits( d.squeeze(), dlbls.float()) loss_cc = ce_criterion(act, labels) loss = loss_cc + loss_d loss.backward() optimizer_c.step() optimizer_D.step() else: loss_cc = ce_criterion(act, labels) optimizer_c.zero_grad() loss = loss_cc loss.backward() optimizer_c.step() running_cc += loss_cc.data t2 = time.time() acct += torch.sum(torch.eq(ind, labels)).cpu().numpy() nelt += ind.shape[0] acc0t += torch.sum( torch.eq(ind[dlbls == 0], labels[dlbls == 0])).cpu().numpy() nel0t += (dlbls == 0).shape[0] acc1t += torch.sum( torch.eq(ind[dlbls == 1], labels[dlbls == 1])).cpu().numpy() nel1t += (dlbls == 1).shape[0] if idx % 10 == 0: line = hyper_para.BLUE + '[' + str(format(i + 1, '8d')) + '/' + str( format(int(hyper_para.iterations), '8d')) + ']' + hyper_para.ENDC + \ hyper_para.GREEN + ' loss_cc: ' + hyper_para.ENDC + str( format(running_cc / hyper_para.stats_frequency, '1.8f')) + \ hyper_para.YELLOW + ' time (min): ' + hyper_para.ENDC + str(int((t2 - t1) * 20.0)) print(line) acc = 0 nel = 0 acc0 = 0 nel0 = 0 acc1 = 0 nel1 = 0 for idx, (inputs, labels, dlbls) in enumerate(testLoader): inputs = inputs.permute(0, 3, 2, 1) t1 = time.time() if hyper_para.gpu: inputs = inputs.cuda() labels = labels.cuda() dlbls = dlbls.cuda() act, _ = C(inputs) _, ind = torch.max(act, 1) acc += torch.sum(torch.eq(ind, labels)).cpu().numpy() nel += ind.shape[0] acc0 += torch.sum( torch.eq(ind[dlbls == 0], labels[dlbls == 0])).cpu().numpy() nel0 += (dlbls == 0).shape[0] acc1 += torch.sum( torch.eq(ind[dlbls == 1], labels[dlbls == 1])).cpu().numpy() nel1 += (dlbls == 1).shape[0] print(['Val', acc / nel, acc0 / nel0, acc1 / nel1, nel0, nel1]) if acc / nel >= best_acc: torch.save( C.module.state_dict(), hyper_para.experiment_name + '_' + str(single_class_ind) + '.pth') best_acc = acc / nel running_cc = 0.0 print([ 'Train', acct / (nel0t + nel1t), acc0t / nel0t, acc1t / nel1t, nel0t, nel1t ])
def _transformations_experiment(dataset_load_fn, dataset_name, single_class_ind, gpu_q): gpu_to_use = gpu_q.get() os.environ["CUDA_VISIBLE_DEVICES"] = gpu_to_use (x_train, y_train), (x_test, y_test) = dataset_load_fn() if dataset_name in ['cats-vs-dogs']: transformer = Transformer(16, 16) n, k = (16, 8) else: transformer = Transformer(8, 8) n, k = (10, 4) #mdl = create_wide_residual_network(x_train.shape[1:], transformer.n_transforms, n, k) '''mdl.compile('adam', 'categorical_crossentropy', ['acc'])''' x_train_task = x_train[y_train.flatten() == single_class_ind] #x_train_task = x_train_task[0:15][:][:][:] transformations_inds = np.tile(np.arange(transformer.n_transforms), len(x_train_task)) x_train_task_transformed = transformer.transform_batch( np.repeat(x_train_task, transformer.n_transforms, axis=0), transformations_inds) batch_size = 32 '''mdl.fit(x=x_train_task_transformed, y=to_categorical(transformations_inds), batch_size=batch_size, epochs=int(np.ceil(200/transformer.n_transforms))) mdl_weights_name = '{}_transformations_{}_weights.h5'.format(dataset_name, get_class_name_from_index(single_class_ind, dataset_name)) #mdl.load_weights('cifar10/'+mdl_weights_name) ################################################################################################# # simplified normality score ################################################################################################# preds = np.zeros((len(x_test), transformer.n_transforms)) for t in [1]:#range(1): preds[:, t] = mdl.predict(transformer.transform_batch(x_test, [t] * len(x_test)), batch_size=batch_size)[:, t] labels = y_test.flatten() == single_class_ind scores = preds.mean(axis=-1) #print("Accuracy : "+ str(scores)) #################################################################################################''' dset = CustomDataset(x_train_task_transformed, transformations_inds) trainLoader = DataLoader(dset, batch_size=32, shuffle=True) ce_criterion = nn.CrossEntropyLoss() C = wrn.WideResNet(28, num_classes=72, dropout_rate=0, widen_factor=10).cuda() C = nn.DataParallel(C) optimizer_c = optim.Adam(C.parameters(), lr=0.001, betas=(0.9, 0.999)) C.train(mode=True) '''for epoch in range(int(1)): for (inputs, labels) in trainLoader: inputs = inputs.permute(0,3,2,1) if True: inputs = inputs.cuda() labels = labels.cuda() act,_ = C(inputs) loss_cc = ce_criterion(act, labels) optimizer_c.zero_grad() loss_cc.backward() optimizer_c.step() #running_cc += loss_cc.data running_cc = 0.0 torch.save(C.cpu().state_dict(), 'out.pth')''' C = C.cpu() C.load_state_dict(torch.load('out.pth')) (x_train, y_train), (x_test, y_test) = load_cifar10() transformer = Transformer(8, 8) glabels = y_test.flatten() == single_class_ind C.cuda().eval() scores = np.array([[]]) features = [] correct = 0 total = 0 preds = np.zeros((len(x_test), transformer.n_transforms)) for t in [6, 6]: #range(72): score = [] x_test0 = transformer.transform_batch(x_test, [t] * len(x_test)) dset = CustomDataset(x_test0, [t] * len(x_test)) testLoader = DataLoader(dset, batch_size=128, shuffle=False) for i, (inputs, labels) in enumerate(testLoader): inputs = inputs.permute(0, 3, 2, 1) if True: inputs = inputs.cuda() labels = labels.cuda() act, f = C(inputs) features += f.detach().cpu().tolist() #act = torch.nn.functional.softmax(act, dim=1) score += act[:, t].detach().cpu().tolist() preds[:, t] = list(score) fpr, tpr, thresholds = metrics.roc_curve(glabels, score) AUC = metrics.auc(fpr, tpr) print('AUROC: ' + str(AUC)) scores = np.sum(((preds)), 1) fpr, tpr, thresholds = metrics.roc_curve(glabels, scores) AUC = metrics.auc(fpr, tpr) print('AUROC: ' + str(AUC)) def calc_approx_alpha_sum(observations): N = len(observations) f = np.mean(observations, axis=0) return (N * (len(f) - 1) * (-psi(1))) / (N * np.sum(f * np.log(f)) - np.sum(f * np.sum(np.log(observations), axis=0))) def inv_psi(y, iters=5): # initial estimate cond = y >= -2.22 x = cond * (np.exp(y) + 0.5) + (1 - cond) * -1 / (y - psi(1)) for _ in range(iters): x = x - (psi(x) - y) / polygamma(1, x) return x def fixed_point_dirichlet_mle(alpha_init, log_p_hat, max_iter=1000): alpha_new = alpha_old = alpha_init for _ in range(max_iter): alpha_new = inv_psi(psi(np.sum(alpha_old)) + log_p_hat) if np.sqrt(np.sum((alpha_old - alpha_new)**2)) < 1e-9: break alpha_old = alpha_new return alpha_new def dirichlet_normality_score(alpha, p): return np.sum((alpha - 1) * np.log(p), axis=-1) '''scores = np.zeros((len(x_test),)) observed_data = x_train_task for t_ind in range(transformer.n_transforms): observed_dirichlet = mdl.predict(transformer.transform_batch(observed_data, [t_ind] * len(observed_data)), batch_size=64) log_p_hat_train = np.log(observed_dirichlet).mean(axis=0) alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet) alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train) x_test_p = mdl.predict(transformer.transform_batch(x_test, [t_ind] * len(x_test)), batch_size=64) scores += dirichlet_normality_score(mle_alpha_t, x_test_p)''' scores /= transformer.n_transforms labels = y_test.flatten() == single_class_ind r = (roc_auc_score(labels, scores)) f = open("guru99.txt", "a") f.write(str(r) + "\n") f.close() '''res_file_name = '{}_transformations_{}_{}.npz'.format(dataset_name,
def octest(hyper_para): CC = wrn.WideResNet(28, num_classes=10, dropout_rate=0, widen_factor=10).cuda() CC.load_state_dict(torch.load(hyper_para.experiment_name + '.pth')) C = wrn.WideResNet(28, num_classes=72, dropout_rate=0, widen_factor=10).cuda() C.cuda().eval() CC.cuda().eval() muc = {} stdc = {} mu = {} std = {} for cname in hyper_para.inclass: single_class_ind = cname C.load_state_dict( torch.load('saved/' + hyper_para.experiment_name + '_' + str(cname) + '.pth')) (x_train, y_train), (x_test, y_test) = load_cifar10() transformer = Transformer(8, 8) x_train_task0 = x_train[[ i in hyper_para.inclass for i in y_train.flatten() ]] y_train_task = y_train[[ i in hyper_para.inclass for i in y_train.flatten() ]] for t in range(72): x_train_task = transformer.transform_batch(x_train_task0, [t] * len(y_train_task)) dset = CustomDataset(x_train_task, y_train_task) trainLoader = DataLoader(dset, batch_size=128, shuffle=True) features = [] for inputs0, labels in trainLoader: inputs = inputs0.permute(0, 3, 2, 1).cuda() act, f = C(inputs) features += act[:, t].detach().cpu().tolist() if t == 0: totfeatures = features else: totfeatures += features mu[str(cname)] = np.mean(totfeatures) std[str(cname)] = np.sqrt(np.var(totfeatures)) features = {} if True: (x_train, y_train), (x_test, y_test) = load_cifar10() x_train_task = x_train[[ i in hyper_para.inclass for i in y_train.flatten() ]] y_train_task = y_train[[ i in hyper_para.inclass for i in y_train.flatten() ]] dset = CustomDataset(x_train_task, y_train_task) trainLoader = DataLoader(dset, batch_size=128, shuffle=True) for inputs0, labels0 in trainLoader: inputs = inputs0.permute(0, 3, 2, 1).cuda() act, f = CC(inputs) val, ind = torch.max(act, dim=1) val, ind, labels0 = (val.detach().cpu().tolist(), ind.detach().cpu().tolist(), labels0.detach().cpu().tolist()) for idx, (ii, gt) in enumerate(zip(ind, labels0)): gt = gt[0] if ii == gt and gt in hyper_para.inclass: if str(ii) not in features.keys(): features[str(ii)] = [ act[idx, ii].detach().cpu().tolist() ] else: features[str(ii)] += [ act[idx, ii].detach().cpu().tolist() ] print(np.shape(features[str(ii)])) for k in features.keys(): muc[str(k)] = np.mean(features[str(k)]) stdc[str(k)] = np.sqrt(np.var(features[str(k)])) (x_train, y_train), (x_test, y_test) = load_cifar10() transformer = Transformer(8, 8) scores = np.array([[]]) features = [] lbl = [] correct = 0 total = 0 preds = np.zeros((len(x_test), transformer.n_transforms)) dset = CustomDataset(x_test, y_test) testLoader = DataLoader(dset, batch_size=128) score = [] for i, (inputs0, labels) in enumerate(testLoader): inputs = inputs0.permute(0, 3, 2, 1) if True: inputs = inputs.cuda() labels = labels.cuda() act0, f = CC(inputs) features += f.detach().cpu().tolist() val, ind = torch.max(act0, dim=1) val, ind, labels = (val.detach().cpu().tolist(), ind.detach().cpu().tolist(), labels.detach().cpu().tolist()) #act = torch.nn.functional.softmax(act, dim=1) #score += val for idx, (ii, gt) in enumerate(zip(ind, labels)): C.load_state_dict( torch.load('saved/' + hyper_para.experiment_name + '_' + str(ii) + '.pth')) gt = gt[0] score_temp = [] for t in range(72): x_test0 = transformer.transform_batch( torch.unsqueeze(inputs0[idx, :, :, :], 0).detach().cpu().numpy(), [t]) inputs = torch.tensor(x_test0).permute(0, 3, 2, 1).cuda() act, _ = C(inputs) act = act[:, t] act = act.detach().cpu().tolist() if t == 0: score_temp = act[0] else: score_temp += act[t] score += [(score_temp - mu[str(ii)]) / (std[str(ii)]) + (val[idx] - muc[str(ii)]) / (stdc[str(ii)])] if gt in hyper_para.inclass: total += 1 if ii == gt: correct += 1 lbl.append(1) else: lbl.append(0) fpr, tpr, thresholds = metrics.roc_curve(lbl, score) AUC = metrics.auc(fpr, tpr) print('AUROC: ' + str(AUC)) return ([0, 0])
from tqdm import tqdm from scripts.detached_transformer_od_hits import calc_approx_alpha_sum, fixed_point_dirichlet_mle, dirichlet_normality_score, plot_histogram_disc_loss_acc_thr if __name__ == "__main__": config = tf.ConfigProto() config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU sess = tf.Session(config=config) set_session(sess) single_class_ind = 1 (x_train, y_train), (x_test, y_test) = load_fashion_mnist() print(x_train.shape) print(x_test.shape) transformer = Transformer(8, 8) n, k = (10, 4) mdl = create_wide_residual_network(input_shape=x_train.shape[1:], num_classes=transformer.n_transforms, depth=n, widen_factor=k) mdl.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc']) print(mdl.summary()) # get inliers of specific class x_train_task = x_train[y_train.flatten() == single_class_ind] print(x_train_task.shape)
def octrain(hyper_para): for single_class_ind in [0]: #range(10): hyper_para.C = wrn.WideResNet(28, num_classes=72, dropout_rate=0, widen_factor=10) (x_train, y_train), (x_test, y_test) = load_cifar10() x_train_task = x_train[y_train.flatten() == single_class_ind] dset = RotateDataset(x_train_task) trainLoader = DataLoader(dset, batch_size=512, shuffle=True) transformer = Transformer(8, 8) '''transformations_inds = np.tile(np.arange(transformer.n_transforms), len(x_train_task)) x_train_task_transformed = transformer.transform_batch(np.repeat(x_train_task, transformer.n_transforms, axis=0), transformations_inds) dset = CustomDataset(x_train_task_transformed,transformations_inds ) trainLoader = DataLoader(dset, batch_size=32, shuffle=True)''' # define networks C = hyper_para.C C = torch.nn.DataParallel(C) # define loss functions ce_criterion = nn.CrossEntropyLoss() # define optimizer optimizer_c = optim.Adam(C.parameters(), lr=hyper_para.lr, betas=(0.9, 0.999)) # turn on the train mode C.train(mode=True) # initialization of auxilary variables running_tl = 0.0 running_cc = 0.0 running_rc = 0.0 running_ri = 0.0 # if gpu use cuda for i in range(100): if hyper_para.gpu: C.cuda() for idx, (inputs, labels) in enumerate(trainLoader): inputs = inputs.permute(0, 3, 2, 1) t1 = time.time() if hyper_para.gpu: inputs = inputs.cuda() labels = labels.cuda() act, _ = C(inputs) loss_cc = ce_criterion(act, labels) optimizer_c.zero_grad() loss_cc.backward() optimizer_c.step() running_cc += loss_cc.data t2 = time.time() if idx % 10 == 0: line = hyper_para.BLUE + '[' + str(format(i + 1, '8d')) + '/' + str( format(int(hyper_para.iterations), '8d')) + ']' + hyper_para.ENDC + \ hyper_para.GREEN + ' loss_cc: ' + hyper_para.ENDC + str( format(running_cc / hyper_para.stats_frequency, '1.8f')) + \ hyper_para.YELLOW + ' time (min): ' + hyper_para.ENDC + str(int((t2 - t1) * 20.0)) print(line) running_cc = 0.0 torch.save( C.module.state_dict(), hyper_para.experiment_name + '_' + str(single_class_ind) + '.pth')
def __init__(self, x_tensor): self.x = x_tensor self.transformer = Transformer(8, 8)
def _transformations_experiment(dataset_load_fn, dataset_name, single_class_ind, gpu_q): gpu_to_use = gpu_q.get() os.environ["CUDA_VISIBLE_DEVICES"] = gpu_to_use (x_train, y_train), (x_test, y_test) = dataset_load_fn() if dataset_name in ['cats-vs-dogs']: transformer = Transformer(16, 16) n, k = (16, 8) else: transformer = Transformer(8, 8) n, k = (10, 4) mdl = create_wide_residual_network(x_train.shape[1:], transformer.n_transforms, n, k) mdl.compile('adam', 'categorical_crossentropy', ['acc']) x_train_task = x_train[y_train.flatten() == single_class_ind] transformations_inds = np.tile(np.arange(transformer.n_transforms), len(x_train_task)) x_train_task_transformed = transformer.transform_batch(np.repeat(x_train_task, transformer.n_transforms, axis=0), transformations_inds) batch_size = 128 mdl.fit(x=x_train_task_transformed, y=to_categorical(transformations_inds), batch_size=batch_size, epochs=int(np.ceil(200/transformer.n_transforms)) ) ################################################################################################# # simplified normality score ################################################################################################# # preds = np.zeros((len(x_test), transformer.n_transforms)) # for t in range(transformer.n_transforms): # preds[:, t] = mdl.predict(transformer.transform_batch(x_test, [t] * len(x_test)), # batch_size=batch_size)[:, t] # # labels = y_test.flatten() == single_class_ind # scores = preds.mean(axis=-1) ################################################################################################# def calc_approx_alpha_sum(observations): N = len(observations) f = np.mean(observations, axis=0) return (N * (len(f) - 1) * (-psi(1))) / ( N * np.sum(f * np.log(f)) - np.sum(f * np.sum(np.log(observations), axis=0))) def inv_psi(y, iters=5): # initial estimate cond = y >= -2.22 x = cond * (np.exp(y) + 0.5) + (1 - cond) * -1 / (y - psi(1)) for _ in range(iters): x = x - (psi(x) - y) / polygamma(1, x) return x def fixed_point_dirichlet_mle(alpha_init, log_p_hat, max_iter=1000): alpha_new = alpha_old = alpha_init for _ in range(max_iter): alpha_new = inv_psi(psi(np.sum(alpha_old)) + log_p_hat) if np.sqrt(np.sum((alpha_old - alpha_new) ** 2)) < 1e-9: break alpha_old = alpha_new return alpha_new def dirichlet_normality_score(alpha, p): return np.sum((alpha - 1) * np.log(p), axis=-1) scores = np.zeros((len(x_test),)) observed_data = x_train_task for t_ind in range(transformer.n_transforms): observed_dirichlet = mdl.predict(transformer.transform_batch(observed_data, [t_ind] * len(observed_data)), batch_size=1024) log_p_hat_train = np.log(observed_dirichlet).mean(axis=0) alpha_sum_approx = calc_approx_alpha_sum(observed_dirichlet) alpha_0 = observed_dirichlet.mean(axis=0) * alpha_sum_approx mle_alpha_t = fixed_point_dirichlet_mle(alpha_0, log_p_hat_train) x_test_p = mdl.predict(transformer.transform_batch(x_test, [t_ind] * len(x_test)), batch_size=1024) scores += dirichlet_normality_score(mle_alpha_t, x_test_p) scores /= transformer.n_transforms labels = y_test.flatten() == single_class_ind res_file_name = '{}_transformations_{}_{}.npz'.format(dataset_name, get_class_name_from_index(single_class_ind, dataset_name), datetime.now().strftime('%Y-%m-%d-%H%M')) res_file_path = os.path.join(RESULTS_DIR, dataset_name, res_file_name) save_roc_pr_curve_data(scores, labels, res_file_path) mdl_weights_name = '{}_transformations_{}_{}_weights.h5'.format(dataset_name, get_class_name_from_index(single_class_ind, dataset_name), datetime.now().strftime('%Y-%m-%d-%H%M')) mdl_weights_path = os.path.join(RESULTS_DIR, dataset_name, mdl_weights_name) mdl.save_weights(mdl_weights_path) gpu_q.put(gpu_to_use)
def os_train(hyper_para): C = wrn.WideResNet(28, num_classes=10, dropout_rate=0, widen_factor=10).cuda() single_class_ind = hyper_para.inclass[0] (x_train, y_train), (x_test, y_test) = load_cifar10() transformer = Transformer(8, 8) x_train_task = x_train[[ i in hyper_para.inclass for i in y_train.flatten() ]] y_train_task = y_train[[ i in hyper_para.inclass for i in y_train.flatten() ]] print(np.shape(y_train_task)) print(np.shape(y_train)) y_train_task = y_train_task.astype(int) #x_train_task = x_train_task[0:15][:][:][:] dset = CustomDataset(x_train_task, y_train_task) trainLoader = DataLoader(dset, batch_size=128, shuffle=True) # define networks # define loss functions ce_criterion = nn.CrossEntropyLoss() # define optimizer optimizer_c = optim.Adam(C.parameters(), lr=hyper_para.lr, betas=(0.9, 0.999)) # turn on the train mode C.train(mode=True) # initialization of auxilary variables running_tl = 0.0 running_cc = 0.0 running_rc = 0.0 running_ri = 0.0 # if gpu use cuda for i in range(int(20)): if hyper_para.gpu: C.cuda() for iii, (inputs, labels) in enumerate(trainLoader): inputs = inputs.permute(0, 3, 2, 1) t1 = time.time() if hyper_para.gpu: inputs = inputs.cuda() labels = labels[:, 0].cuda() act, _ = C(inputs) loss_cc = ce_criterion(act, labels) optimizer_c.zero_grad() loss_cc.backward() optimizer_c.step() running_cc += loss_cc.data t2 = time.time() if iii % 50 == 0: line = hyper_para.BLUE + '[' + str(format(i + 1, '8d')) + '/' + str( format(int(hyper_para.iterations), '8d')) + ']' + hyper_para.ENDC + \ hyper_para.GREEN + ' loss_cc: ' + hyper_para.ENDC + str( format(running_cc / hyper_para.stats_frequency, '1.8f')) + \ hyper_para.YELLOW + ' time (min): ' + hyper_para.ENDC + str(int((t2 - t1) * 20.0)) print(line) running_cc = 0.0 torch.save(C.state_dict(), hyper_para.experiment_name + '.pth')