def read(fp=None,mode=0,key='salinas',seed=-1,opendic=1,cls1=-1,num=20): if mode==0: # closed classification pre = np.load(fp+key+'_close_'+str(seed)+'.npy') elif mode==1: # MDL4OW pre = np.load(fp+key+'_pre_o1_'+str(seed)+'.npy') elif mode==2: # MDL4OW/C pre = np.load(fp+key+'_pre_o2_'+str(seed)+'.npy') elif mode==3: # closed classification, same as mode==1, except input is probablity: predict image, imx*imy*c pre = np.load(r'G:\open-set-standard\keras\saved\hresnet_200\paviaU_'+str(seed)+'.npy') pre = np.argmax(pre,axis=-1)+1 elif mode==4: # softmax-threshold pre = np.load(fp+key+'_pre_'+str(seed)+'.npy') pre1 = np.argmax(pre,axis=-1)+1 mask = pre.max(axis=-1) pre1[mask<opendic] = cls1 pre = pre1 elif mode==5: # openmax pre = np.load(fp+key+'_close_'+str(seed)+'.npy') im1x,im1y = pre.shape tmp3 = np.load(fp+key+'_trainloss_'+str(seedi)+'.npy') #2 evm = np.load(fp+key+'_evm_'+str(seedi)+'.npy') numofevm_all = int(num*4*0.5) numofevm = int(num*4*0.05) if numofevm<3: numofevm=3 if numofevm_all<20: numofevm_all=20 # all in mr = libmr.MR() mr.fit_high(tmp3,numofevm_all) # tmp3, loss of training samples wscore = mr.w_score_vector(evm) mask = wscore>1-opendic mask = mask.reshape(im1x,im1y) pre[mask] = cls1 return pre
def verify_wscore(): import libmr numInstances = 10 dataSize = 40000 tailSize = 2500 dummy_training_data = torch.rand( (numInstances, dataSize)).type(torch.DoubleTensor) dummy_test_data = torch.rand( (numInstances, dataSize)).type(torch.DoubleTensor) models = {} probs = [] for i in range(numInstances): models[i] = libmr.MR() models[i].fit_low(dummy_training_data[i, :].numpy(), tailSize) k_l = [] for l in dummy_test_data[i, :].numpy().tolist(): k_l.append(models[i].w_score(l)) probs.append(k_l) rt = torch.tensor(probs) print(rt) weibullObj = weibull.weibull() weibullObj.FitLow(dummy_training_data.cuda(), tailSize, isSorted=False) print(weibullObj.wscore(dummy_test_data)) print(weibullObj.wscore(dummy_test_data).shape)
def weibull_tailfitting(meanfiles_path, distancefiles_path, labellist, tailsize = 20, distance_type = 'eucos'): """ Read through distance files, mean vector and fit weibull model for each category Input: -------------------------------- meanfiles_path : contains path to files with pre-computed mean-activation vector distancefiles_path : contains path to files with pre-computed distances for images from MAV labellist : ImageNet 2012 labellist Output: -------------------------------- weibull_model : Perform EVT based analysis using tails of distances and save weibull model parameters for re-adjusting softmax scores """ weibull_model = {} # for each category, read meanfile, distance file, and perform weibull fitting for category in labellist: weibull_model[category] = {} distance_scores = loadmat('%s/%s_distances.mat' %(distancefiles_path, category))[distance_type] meantrain_vec = loadmat('%s/%s.mat' %(meanfiles_path, category)) weibull_model[category]['distances_%s'%distance_type] = distance_scores weibull_model[category]['mean_vec'] = meantrain_vec weibull_model[category]['weibull_model'] = [] for channel in range(NCHANNELS): mr = libmr.MR() tailtofit = sorted(distance_scores[channel, :])[-tailsize:] mr.fit_high(tailtofit, len(tailtofit)) weibull_model[category]['weibull_model'] += [mr] return weibull_model
def test_load_from_string(self): # We should be able to deserialize a string to an MR mr = libmr.MR() mr.fit_high(np.random.randn(100), 100) saved = str(mr) mr2 = libmr.load_from_string(saved) assert str(mr) == str(mr2)
def weibull_fitting(path, tailsize=20, distance_type='eucos_dist'): # fit the distance distributions mav_path = path + "features_fc8_mav.npz" topk_mav_path = path + "features_fc8_topk_mav.npz" dist_path = path + "mean_distance_files/class_" labels = [0, 1, 2, 3, 4, 5, 6, 7] # data = np.load(path+"mean_distance_files/class_0.npz") # print(data['eu_dist'][0]) weibull_model = {} # for each class, read meanfile, distance file, and perform weibull fitting for category in labels: weibull_model[category] = {} distance_scores = np.load(dist_path + str(category) + ".npz")[distance_type] if distance_type == 'mahal_dist': meantrain_vec = np.load(topk_mav_path)['topk'][category] else: meantrain_vec = np.load(mav_path)['features'][category] NCHANNELS = 1 weibull_model[category]['distances_%s' % distance_type] = distance_scores weibull_model[category]['mean_vec'] = meantrain_vec weibull_model[category]['weibull_model'] = [] for channel in range(NCHANNELS): mr = libmr.MR() tailtofit = sorted(distance_scores[channel, :])[-tailsize:] mr.fit_high(tailtofit, len(tailtofit)) weibull_model[category]['weibull_model'] += [mr] # print(weibull_model) return weibull_model
def main(): posscores = sp.asarray( [0.245, 0.2632, 0.3233, 0.3573, 0.4014, 0.4055, 0.4212, 0.5677]) test_distances = sp.asarray([0.05, 0.1, 0.25, 0.4, 0.75, 1., 1.5, 2.]) mr = libmr.MR() # since higher is worse and we want to fit the higher tail, # use fit_high() mr.fit_high(posscores, posscores.shape[0]) wscores = mr.w_score_vector(test_distances) for i in range(wscores.shape[0]): print("%.2f %.2f %.2f" % (test_distances[i], wscores[i], mr.inv(wscores[i]))) # wscores are the ones to be used in the equation # s_i * (1 - rho_i) print( "Low wscore --> Low probability that the score is outlier i.e. sample IS NOT outlier" ) print( "High wscore --> High probability that the score is outlier i.e. sample IS an outlier" ) print("posscores: ", posscores) print("test_distances: ", test_distances) print("wscores: ", wscores)
def weibull_fit_tails(av_map, tail_size=200, metric_type='cosine'): weibull_model = {} labels = av_map.keys() for label in labels: print(f'EVT fitting for label {label}') weibull_model[label] = {} class_av = av_map[label] class_mav = np.mean(class_av, axis=0, keepdims=True) av_distance = np.zeros((1, class_av.shape[0])) for i in range(class_av.shape[0]): av_distance[0, i] = compute_distance(class_av[i, :].reshape(1, -1), class_mav, metric_type=metric_type) weibull_model[label]['mean_vec'] = class_mav weibull_model[label]['distances'] = av_distance mr = libmr.MR() tail_size_fix = min(tail_size, av_distance.shape[1]) tails_to_fit = sorted(av_distance[0, :])[-tail_size_fix:] mr.fit_high(tails_to_fit, tail_size_fix) weibull_model[label]['weibull_model'] = mr return weibull_model
def weibull_fit_parallel(args): """Parallelized for efficiency""" global tailsize dists, row, labels = args nearest = np.partition(dists[np.where(labels != labels[row])], tailsize) mr = libmr.MR() mr.fit_low(nearest, tailsize) return str(mr)
def add_EV(self, x0, y0, step): self.mr_x.append(libmr.MR()) self.mr_y.append(libmr.MR()) self.x0.append(x0) self.y0.append(y0) self.X.append(x0) self.y.append(y0) self.step.append(step) self.last_update.append(np.max(step)) self.theta.append(np.zeros_like(x0)) self.c = self.c + 1 if self.rho is None: # coefficients of the consequent part self.theta[-1] = np.insert(self.theta[-1], 0, y0, axis=1).T else: self.init_theta = 2 # coefficients of the consequent part self.theta[-1] = np.insert(self.theta[-1], 0, y0, axis=1)
def EVT_params(self, activations, get_dist=False): mr = libmr.MR() #Invoca un objeto metarecognition mu = activations.mean(0, keepdims= True) distancia = np.sort(np.squeeze(cdist(activations, mu, self.distance))) if len(distancia) < self.tail_size: mr.fit_high(distancia, len(distancia)-1) else: mr.fit_high(distancia, self.tail_size) if get_dist: return mu, mr, distancia return mu, mr
def weibull_tailfit(tailsize): dist = np.load("preprocessing/dist_c10_train.npy", allow_pickle=1) weibull_model = [] for category in range(10): mr = libmr.MR() tailtofit = sorted(dist[category])[-tailsize:] mr.fit_high(tailtofit, len(tailtofit)) weibull_model.append(mr) weibull_model = np.asarray(weibull_model) return weibull_model
def weibull_fit_parallel(args): """Parallelized for efficiency""" dists, row, labels, tailsize = args nearest = dists[np.where(labels != labels[row])].copy() nearest.sort() tailsize = min(tailsize, nearest.shape[0]) settail = set(nearest[:tailsize]) while len(settail) < quant_min_diff_tailsize: settail.add(nearest[tailsize]) tailsize += 1 mr = libmr.MR() mr.fit_low(nearest, tailsize) return str(mr)
def main(): mr = libmr.MR() datasize = len(svm_data["scores"]) mr.fit_svm(svm_data, datasize, 1, 1, 1, 10) print(fit_data) print(mr.w_score_vector(fit_data)) mr.mr_save("meta_rec.model") datadump = {} datadump = {"data": fit_data} f = open("data.dump", "w") pickle.dump(datadump, f) f.close() print(dir(mr))
def fit_weibull(means, dists, categories, tailsize=20, distance_type='eucos'): weibull_model = {} for mean, dist, category_name in zip(means, dists, categories): weibull_model[category_name] = {} weibull_model[category_name]['distances_{}'.format( distance_type)] = dist[distance_type] weibull_model[category_name]['mean_vec'] = mean weibull_model[category_name]['weibull_model'] = [] for channel in range(mean.shape[0]): mr = libmr.MR() tailtofit = np.sort(dist[distance_type][channel, :])[-tailsize:] mr.fit_high(tailtofit, len(tailtofit)) weibull_model[category_name]['weibull_model'].append(mr) return weibull_model
def fit_weibull_models(distribution_values, tailsizes, num_max_fits=50): """ Function to fit weibull models on distribution values per class. The distribution values in our case are the distances of an inputs approximate posterior value to the per class mean latent z, i.e. The Weibull model fits regions of high density and gives credible intervals. The tailsize specifies how many outliers are expected in the dataset for which the model has been trained. We use libmr https://github.com/Vastlab/libMR (installable through e.g. pip) for the Weibull model fitting. Parameters: distribution_values (list): Values on which the fit is conducted. In our case latent space distances. tailsizes (list): List of integers, specifying tailsizes per class. For a balanced dataset typically the same. num_max_fits (int): Number of attempts to fit the Weibull models before timing out and returning unsuccessfully. Returns: list: List of Weibull models with their respective parameters (stored in libmr class instances). """ weibull_models = [] # loop through the list containing distance values per class for i in range(len(distribution_values)): # for each class set the initial success to False and number of attempts to 0 is_valid = False count = 0 # If the list contains distance values conduct a fit. If it is empty, e.g. because there is not a single # prediction for the corresponding class, continue with the next class. Note that the latter isn't expected for # a model that has been trained for even just a short while. if isinstance(distribution_values[i], torch.Tensor): distribution_values[i] = distribution_values[i].cpu().numpy() # weibull model per class weibull_models.append(libmr.MR()) # attempt num_max_fits many fits before aborting while is_valid is False and count < num_max_fits: # conduct the fit with libmr weibull_models[i].fit_high(distribution_values[i], tailsizes[i]) is_valid = weibull_models[i].is_valid count += 1 if not is_valid: print("Weibull fit for class " + str(i) + " not successful after " + str(num_max_fits) + " attempts") return weibull_models, False else: weibull_models.append([]) return weibull_models, True
def weibull_tailfitting(meantrain_vec, distance_scores, tailsize=20, distance_type='eucos'): import libmr weibull_model = [] for i in range(len(meantrain_vec)): model = {} model['distances_%s' % distance_type] = distance_scores[i] model['mean_vec'] = meantrain_vec[i] mr = libmr.MR() tailtofit = sorted(distance_scores[i])[-tailsize:] mr.fit_high(np.array(tailtofit), len(tailtofit)) model['weibull_model'] = mr weibull_model.append(model) return weibull_model
def weibull_tailfitting(mean, distances, num_classes, tailsize=20): weibull_model = {} for i in range(num_classes): weibull_model[i] = {} weibull_model[i]['distances'] = distances[i] weibull_model[i]['mean_vec'] = mean[i] weibull_model[i]['weibull_model'] = [] mr = libmr.MR() tail_to_fit = sorted(distances[i])[-tailsize:] mr.fit_high(tail_to_fit, len(tail_to_fit)) weibull_model[i]['weibull_model'] += [mr] sys.stdout.flush() return weibull_model
def weibull_tailfitting(prototypes, distances, tailsize=3, distance_type='l2'): weibull_model = {} for class_nr in range(len(prototypes)): weibull_model[class_nr] = {} weibull_model[class_nr]['distances_' + str(distance_type)] = distances[class_nr] weibull_model[class_nr]['prototype'] = prototypes[class_nr] mr = libmr.MR() #print(distances) tailtofit = sorted(distances[class_nr])[-tailsize:] #print('aisjdfoiajsdfoasidfjoaisjdfoaisjdf') #print(tailtofit) mr.fit_high(tailtofit, len(tailtofit)) #mr.fit_low(tailtofit, len(tailtofit)) weibull_model[class_nr]['weibull_model'] = mr return weibull_model
def weibull_tailfitting(mean, distance, labellist, tailsize=20, distance_type='eucos'): weibull_model = {} for category in labellist: weibull_model[category] = {} distance_scores = np.array(distance[category][distance_type]) meantrain_vec = np.array(mean[category]) weibull_model[category]['distances_%s' % distance_type] = distance_scores weibull_model[category]['mean_vec'] = meantrain_vec weibull_model[category]['weibull_model'] = [] mr = libmr.MR() tailtofit = sorted(distance_scores)[-tailsize:] mr.fit_high(tailtofit, len(tailtofit)) weibull_model[category]['weibull_model'] += [mr] return weibull_model
def fit_weibull(mavs, dists, categories, tailsize=20, distance_type='eucos'): weibull_model = {} # a dictionary data structure, total class_num k-v elements # mav's shape (1, class_num) # dist's shape (3, 1, N_c) for mav, dist, category_name in zip(mavs, dists, categories): weibull_model[category_name] = {} # one weibull model per category weibull_model[category_name]['distances_{}'.format(distance_type)] = dist[distance_type] weibull_model[category_name]['mean_vec'] = mav # shape=(1, class_num) weibull_model[category_name]['weibull_model'] = [] for channel in range(mav.shape[0]): mr = libmr.MR() # default sort axis = -1, the last axis tailtofit = np.sort(dist[distance_type][channel, :])[-tailsize:] mr.fit_high(tailtofit, len(tailtofit)) #print('Result of fit_high: ', ret) weibull_model[category_name]['weibull_model'].append(mr) # one mr model per channel per category return weibull_model
def weibull_tailfitting(eucos_dist, mean_activations, taillength=8): """ Fits a Weibull model of the logit vectors farthest from the MAV. :param eucos_dist: the euclidean-cosine distance from the MAV. :param mean_activations: mean activation vector (MAV). :param taillength: :return: weibull model. """ weibull_model = {} for cl in range(10): weibull_model[str(cl)] = {} weibull_model[str(cl)]['eucos_distances'] = eucos_dist[cl] weibull_model[str(cl)]['mean_vec'] = mean_activations[cl] weibull_model[str(cl)]['weibull_model'] = [] mr = libmr.MR(verbose=True) tailtofit = sorted(mean_activations[0])[-taillength:] mr.fit_high(tailtofit, len(tailtofit)) weibull_model[str(cl)]['weibull_model'] = mr return weibull_model
def fit_weibull(means, dists, categories, tailsize=20, distance_type='eucos'): """ Input: means (C, channel, C) dists (N_c, channel, C) * C Output: weibull_model : Perform EVT based analysis using tails of distances and save weibull model parameters for re-adjusting softmax scores """ weibull_model = {} for mean, dist, category_name in zip(means, dists, categories): weibull_model[category_name] = {} weibull_model[category_name]['distances_{}'.format(distance_type)] = dist[distance_type] weibull_model[category_name]['mean_vec'] = mean weibull_model[category_name]['weibull_model'] = [] for channel in range(mean.shape[0]): mr = libmr.MR() tailtofit = np.sort(dist[distance_type][channel, :])[-tailsize:] mr.fit_high(tailtofit, len(tailtofit)) weibull_model[category_name]['weibull_model'].append(mr) return weibull_model
def update_class_stats(self, X, y): z = self.latent(X) pred_y = self.predict(X) correct = (pred_y == np.argmax(y, axis=1)) z = z[correct] pred_y = pred_y[correct] # fit weibull model for each class self.mr_model = {} self.c_means = np.zeros((self.y_dim, z.shape[1])) for c in range(self.y_dim): # Calculate Class Mean z_c = z[pred_y == c] mu_c = z_c.mean(axis=0) # Fit Weibull mr = libmr.MR() tailtofit = sorted(self.dist_from_mav( z_c, mu_c[None, :]).ravel())[-self.tailsize:] mr.fit_high(tailtofit, len(tailtofit)) self.mr_model[c] = mr self.c_means[c, :] = mu_c
def _gather_weibull_distribution(self, training_features, div_eu, weibull_tail_size=20): weibull = { seen_class_index: { 'mav': None, 'eucos_distances': None, 'weibull_model': None } for seen_class_index in training_features.keys() } for index in training_features.keys(): if not len(training_features[index]) > 0: print(f"Error: No training examples for category {index}") import pdb pdb.set_trace() # breakpoint 18e1e416 // else: features_tensor = torch.cat(training_features[index], dim=0) mav = torch.mean(features_tensor, 0) mav_matrix = mav.unsqueeze(0).expand(features_tensor.size(0), -1) eu_distances = torch.sqrt( torch.sum( (mav_matrix - features_tensor)**2, dim=1)) / div_eu # EU distance divided by div_eu. cos_distances = 1 - torch.nn.CosineSimilarity(dim=1)( mav_matrix, features_tensor) eucos_distances = eu_distances + cos_distances weibull[index]['mav'] = mav # weibull[index]['eucos_distances'] = eucos_distances distance_scores = list(eucos_distances) mr = libmr.MR() tailtofit = sorted(distance_scores)[-weibull_tail_size:] mr.fit_high(tailtofit, len(tailtofit)) weibull[index]['weibull_model'] = mr return weibull
def cls_mav_dist(cls): data = {} same_cls = feature_vector_file.iloc[results_file.index[results_file[1] == cls], 1:] print "same_cls", same_cls.shape MAV = same_cls.mean() if args.use_euclidean: distances = same_cls.apply( lambda row: scipy.spatial.distance.euclidean(MAV, row), axis=1) else: distances = same_cls.apply( lambda row: scipy.spatial.distance.cosine(MAV, row), axis=1) distances = distances.values.tolist() print "len(distances)", len(distances) mr = libmr.MR() # Fitting an EVT on the distances mr.fit_high(distances, args.tail_size) data['model'] = mr data['MAV'] = MAV data['distances'] = distances del distances, same_cls return data
def weibull_tailfitting(meanfiles_path, distancefiles_path, labellist, tailsize=20, distance_type='eucos'): """ Read through distance files, mean vector and fit weibull model for each category Input: -------------------------------- meanfiles_path : contains path to files with pre-computed mean-activation vector distancefiles_path : contains path to files with pre-computed distances for images from MAV labellist : criterila_list Output: -------------------------------- weibull_model : Perform EVT based analysis using tails of distances and save weibull model parameters for re-adjusting softmax scores """ weibull_model = {} # for each category, read meanfile, distance file, and perform weibull fitting for category in labellist: weibull_model[category] = {} distance_mat_path = pathlib.Path(distancefiles_path).joinpath('{}_distance.mat'.format(category)) mean_train_vec_path = pathlib.Path(meanfiles_path).joinpath('{}.mat'.format(category)) if not (distance_mat_path.exists() and mean_train_vec_path.exists()): continue distance_scores = loadmat(distance_mat_path)[distance_type] meantrain_vec = loadmat(mean_train_vec_path) weibull_model[category]['distances_%s' % distance_type] = distance_scores weibull_model[category]['mean_vec'] = meantrain_vec weibull_model[category]['weibull_model'] = [] for channel in range(1): mr = libmr.MR() tailtofit = sorted(distance_scores[channel, :])[-tailsize:] mr.fit_high(tailtofit, len(tailtofit)) weibull_model[category]['weibull_model'] += [mr] return weibull_model
def fit(X, y, tailsize, Cl, distance): """ Returns the Weibull parameters of each instance of class Cl, that is, the parameters that model the distribution of the shortest 'tailsize' margins of that class with respect to all other classes :param X: List containing matrices of (Nl x dimension_of_feature_vector) of all training classes :param y: Labels of the classes :param tailsize: Number of margins to be fitted by the Weibull distribution :param Cl: Class identifier from list known_classes :return: PSI_l --> (Nl x 2) matrix containing the scale (lambda) and shape (k) of the fitted margins for each instance of the class l """ Xl, Xnotl = select_class(Cl, X, y) # distance computation if (distance == 0): D = pairwise_euclidean_distance(Xl, Xnotl) elif (distance == 1): D = ppp_cosine_similarity(Xl, Xnotl) D = D.numpy() #print(D) Nl = len(Xl[:, 0]) # PSI_l is formed by (lambda, k) PSI_l = np.zeros((Nl, 2)) mr = libmr.MR() for i in range(0, Nl): # We want to know the distribution of the MARGINS (we have to divide by 2 because the margin is the point that is half-way the negative sample) # We have to sort the vector of distances because we are interested in the closest instances, that are the most important defining the margins # because they can create confusion. NOTE = 0.5 is because is a margin d_sorted = 0.5 * np.sort(D[i, :])[:tailsize] if (distance == 0): mr.fit_low(d_sorted, tailsize) PSI_li = mr.get_params()[:2] elif (distance == 1): k_i, lambda_i = fit_(d_sorted, iters=100, eps=1e-6) PSI_li = (lambda_i, k_i) PSI_l[i, :] = PSI_li return PSI_l
def fit_psi(X, y, tau, Cl): # obtain samples of class current l = np.argwhere(y == Cl).reshape(-1) X_l = X[l] # samples obtained from other classes m = np.argwhere(y != Cl).reshape(-1) X_m = X[m] # calculates the distance between pair of samples # current class, X_l, and samples of other classes, X_m D = sklearn.metrics.pairwise.pairwise_distances(X_l, X_m) psi = [] # for each sample belonging to class Cl, esteem the # shape and scale parameters based on the half of the distance # of the tau samples not belonging to Cl for i in range(X_l.shape[0]): mr = libmr.MR() mr.fit_low(1/2 * D[i], tau) psi.append(mr) return np.array(psi)
def fit_psi(X, y, tau, Cl): # obtém as amostras da classe atual l = np.argwhere(y == Cl).reshape(-1) X_l = X[l] # obtém as amostras das outras classes m = np.argwhere(y != Cl).reshape(-1) X_m = X[m] # calcula os pares de distâncias entre as amostras da classe # atual, X_l, e as amostras das outras classes, X_m D = sklearn.metrics.pairwise.pairwise_distances(X_l, X_m) psi = [] # para cada amostra pertencente à classe Cl, estima os # parâmetros shape e scale com base na metade da distância # das tau amostras mais próximas não pertencentes a Cl for i in range(X_l.shape[0]): mr = libmr.MR() mr.fit_low(1 / 2 * D[i], tau) psi.append(mr) return np.array(psi)
def test_str(self): # We should be able to convert an MR to a string mr = libmr.MR() mr.fit_high(np.random.randn(100), 100) assert len(str(mr)) > 0