Ejemplo n.º 1
0
def read(fp=None,mode=0,key='salinas',seed=-1,opendic=1,cls1=-1,num=20):
    if mode==0: # closed classification
        pre = np.load(fp+key+'_close_'+str(seed)+'.npy')
    elif mode==1: # MDL4OW
        pre = np.load(fp+key+'_pre_o1_'+str(seed)+'.npy')
    elif mode==2: # MDL4OW/C
        pre = np.load(fp+key+'_pre_o2_'+str(seed)+'.npy')
    elif mode==3: # closed classification, same as mode==1, except input is probablity: predict image, imx*imy*c
        pre = np.load(r'G:\open-set-standard\keras\saved\hresnet_200\paviaU_'+str(seed)+'.npy') 
        pre = np.argmax(pre,axis=-1)+1
    elif mode==4:  # softmax-threshold
        pre = np.load(fp+key+'_pre_'+str(seed)+'.npy')
        pre1 = np.argmax(pre,axis=-1)+1
        mask = pre.max(axis=-1)
        pre1[mask<opendic] = cls1
        pre = pre1
    elif mode==5:  # openmax
        pre = np.load(fp+key+'_close_'+str(seed)+'.npy')
        im1x,im1y = pre.shape
        tmp3 = np.load(fp+key+'_trainloss_'+str(seedi)+'.npy') #2
        evm = np.load(fp+key+'_evm_'+str(seedi)+'.npy')
        numofevm_all = int(num*4*0.5)
        numofevm = int(num*4*0.05)
        if numofevm<3:
            numofevm=3
        if numofevm_all<20:
            numofevm_all=20
        # all in 
        mr = libmr.MR()
        mr.fit_high(tmp3,numofevm_all) # tmp3, loss of training samples
        wscore = mr.w_score_vector(evm)
        mask = wscore>1-opendic
        mask = mask.reshape(im1x,im1y)
        pre[mask] = cls1
    return pre
Ejemplo n.º 2
0
def verify_wscore():
    import libmr
    numInstances = 10
    dataSize = 40000
    tailSize = 2500
    dummy_training_data = torch.rand(
        (numInstances, dataSize)).type(torch.DoubleTensor)
    dummy_test_data = torch.rand(
        (numInstances, dataSize)).type(torch.DoubleTensor)

    models = {}
    probs = []
    for i in range(numInstances):
        models[i] = libmr.MR()
        models[i].fit_low(dummy_training_data[i, :].numpy(), tailSize)
        k_l = []
        for l in dummy_test_data[i, :].numpy().tolist():
            k_l.append(models[i].w_score(l))
        probs.append(k_l)

    rt = torch.tensor(probs)
    print(rt)

    weibullObj = weibull.weibull()
    weibullObj.FitLow(dummy_training_data.cuda(), tailSize, isSorted=False)
    print(weibullObj.wscore(dummy_test_data))
    print(weibullObj.wscore(dummy_test_data).shape)
Ejemplo n.º 3
0
def weibull_tailfitting(meanfiles_path, distancefiles_path, labellist,
                        tailsize = 20,
                        distance_type = 'eucos'):

    """ Read through distance files, mean vector and fit weibull model for each category

    Input:
    --------------------------------
    meanfiles_path : contains path to files with pre-computed mean-activation vector
    distancefiles_path : contains path to files with pre-computed distances for images from MAV
    labellist : ImageNet 2012 labellist

    Output:
    --------------------------------
    weibull_model : Perform EVT based analysis using tails of distances and save
                    weibull model parameters for re-adjusting softmax scores
    """

    weibull_model = {}
    # for each category, read meanfile, distance file, and perform weibull fitting
    for category in labellist:
        weibull_model[category] = {}
        distance_scores = loadmat('%s/%s_distances.mat' %(distancefiles_path, category))[distance_type]
        meantrain_vec = loadmat('%s/%s.mat' %(meanfiles_path, category))

        weibull_model[category]['distances_%s'%distance_type] = distance_scores
        weibull_model[category]['mean_vec'] = meantrain_vec
        weibull_model[category]['weibull_model'] = []
        for channel in range(NCHANNELS):
            mr = libmr.MR()
            tailtofit = sorted(distance_scores[channel, :])[-tailsize:]
            mr.fit_high(tailtofit, len(tailtofit))
            weibull_model[category]['weibull_model'] += [mr]

    return weibull_model
Ejemplo n.º 4
0
 def test_load_from_string(self):
     # We should be able to deserialize a string to an MR
     mr = libmr.MR()
     mr.fit_high(np.random.randn(100), 100)
     saved = str(mr)
     mr2 = libmr.load_from_string(saved)
     assert str(mr) == str(mr2)
Ejemplo n.º 5
0
def weibull_fitting(path, tailsize=20, distance_type='eucos_dist'):
    # fit the distance distributions
    mav_path = path + "features_fc8_mav.npz"
    topk_mav_path = path + "features_fc8_topk_mav.npz"
    dist_path = path + "mean_distance_files/class_"
    labels = [0, 1, 2, 3, 4, 5, 6, 7]
    # data = np.load(path+"mean_distance_files/class_0.npz")
    # print(data['eu_dist'][0])

    weibull_model = {}
    # for each class, read meanfile, distance file, and perform weibull fitting
    for category in labels:
        weibull_model[category] = {}
        distance_scores = np.load(dist_path + str(category) +
                                  ".npz")[distance_type]
        if distance_type == 'mahal_dist':
            meantrain_vec = np.load(topk_mav_path)['topk'][category]
        else:
            meantrain_vec = np.load(mav_path)['features'][category]

        NCHANNELS = 1
        weibull_model[category]['distances_%s' %
                                distance_type] = distance_scores
        weibull_model[category]['mean_vec'] = meantrain_vec
        weibull_model[category]['weibull_model'] = []
        for channel in range(NCHANNELS):
            mr = libmr.MR()
            tailtofit = sorted(distance_scores[channel, :])[-tailsize:]
            mr.fit_high(tailtofit, len(tailtofit))
            weibull_model[category]['weibull_model'] += [mr]

    # print(weibull_model)

    return weibull_model
Ejemplo n.º 6
0
def main():

    posscores = sp.asarray(
        [0.245, 0.2632, 0.3233, 0.3573, 0.4014, 0.4055, 0.4212, 0.5677])
    test_distances = sp.asarray([0.05, 0.1, 0.25, 0.4, 0.75, 1., 1.5, 2.])

    mr = libmr.MR()
    # since higher is worse and we want to fit the higher tail,
    # use fit_high()
    mr.fit_high(posscores, posscores.shape[0])
    wscores = mr.w_score_vector(test_distances)
    for i in range(wscores.shape[0]):
        print("%.2f %.2f %.2f" %
              (test_distances[i], wscores[i], mr.inv(wscores[i])))
    # wscores are the ones to be used in the equation
    # s_i * (1 - rho_i)
    print(
        "Low wscore --> Low probability that the score is outlier i.e. sample IS NOT outlier"
    )
    print(
        "High wscore --> High probability that the score is outlier i.e. sample IS an outlier"
    )
    print("posscores: ", posscores)
    print("test_distances: ", test_distances)
    print("wscores: ", wscores)
Ejemplo n.º 7
0
def weibull_fit_tails(av_map, tail_size=200, metric_type='cosine'):
    weibull_model = {}
    labels = av_map.keys()

    for label in labels:
        print(f'EVT fitting for label {label}')
        weibull_model[label] = {}

        class_av = av_map[label]
        class_mav = np.mean(class_av, axis=0, keepdims=True)

        av_distance = np.zeros((1, class_av.shape[0]))
        for i in range(class_av.shape[0]):
            av_distance[0, i] = compute_distance(class_av[i, :].reshape(1, -1),
                                                 class_mav,
                                                 metric_type=metric_type)

        weibull_model[label]['mean_vec'] = class_mav
        weibull_model[label]['distances'] = av_distance

        mr = libmr.MR()

        tail_size_fix = min(tail_size, av_distance.shape[1])
        tails_to_fit = sorted(av_distance[0, :])[-tail_size_fix:]
        mr.fit_high(tails_to_fit, tail_size_fix)

        weibull_model[label]['weibull_model'] = mr

    return weibull_model
Ejemplo n.º 8
0
def weibull_fit_parallel(args):
    """Parallelized for efficiency"""
    global tailsize
    dists, row, labels = args
    nearest = np.partition(dists[np.where(labels != labels[row])], tailsize)
    mr = libmr.MR()
    mr.fit_low(nearest, tailsize)
    return str(mr)
Ejemplo n.º 9
0
    def add_EV(self, x0, y0, step):
        self.mr_x.append(libmr.MR())
        self.mr_y.append(libmr.MR())
        self.x0.append(x0)
        self.y0.append(y0)
        self.X.append(x0)
        self.y.append(y0)
        self.step.append(step)
        self.last_update.append(np.max(step))
        self.theta.append(np.zeros_like(x0))
        self.c = self.c + 1

        if self.rho is None:
            # coefficients of the consequent part        
            self.theta[-1] = np.insert(self.theta[-1], 0, y0, axis=1).T        
        else:
            self.init_theta = 2
            # coefficients of the consequent part        
            self.theta[-1] = np.insert(self.theta[-1], 0, y0, axis=1)            
Ejemplo n.º 10
0
 def EVT_params(self, activations, get_dist=False):
     mr = libmr.MR() #Invoca un objeto metarecognition
     mu = activations.mean(0, keepdims= True)
     distancia = np.sort(np.squeeze(cdist(activations, mu, self.distance)))
     if len(distancia) < self.tail_size:
         mr.fit_high(distancia, len(distancia)-1)
     else:
         mr.fit_high(distancia, self.tail_size)
     if get_dist:
         return mu, mr, distancia
     return mu, mr
Ejemplo n.º 11
0
def weibull_tailfit(tailsize):
    dist = np.load("preprocessing/dist_c10_train.npy", allow_pickle=1)

    weibull_model = []
    for category in range(10):
        mr = libmr.MR()
        tailtofit = sorted(dist[category])[-tailsize:]
        mr.fit_high(tailtofit, len(tailtofit))
        weibull_model.append(mr)

    weibull_model = np.asarray(weibull_model)
    return weibull_model
Ejemplo n.º 12
0
def weibull_fit_parallel(args):
    """Parallelized for efficiency"""
    dists, row, labels, tailsize = args
    nearest = dists[np.where(labels != labels[row])].copy()
    nearest.sort()
    tailsize = min(tailsize, nearest.shape[0])
    settail = set(nearest[:tailsize])
    while len(settail) < quant_min_diff_tailsize:
        settail.add(nearest[tailsize])
        tailsize += 1
    mr = libmr.MR()
    mr.fit_low(nearest, tailsize)
    return str(mr)
Ejemplo n.º 13
0
def main():

    mr = libmr.MR()
    datasize = len(svm_data["scores"])
    mr.fit_svm(svm_data, datasize, 1, 1, 1, 10)
    print(fit_data)
    print(mr.w_score_vector(fit_data))
    mr.mr_save("meta_rec.model")
    datadump = {}
    datadump = {"data": fit_data}

    f = open("data.dump", "w")
    pickle.dump(datadump, f)
    f.close()
    print(dir(mr))
Ejemplo n.º 14
0
def fit_weibull(means, dists, categories, tailsize=20, distance_type='eucos'):
    weibull_model = {}
    for mean, dist, category_name in zip(means, dists, categories):
        weibull_model[category_name] = {}
        weibull_model[category_name]['distances_{}'.format(
            distance_type)] = dist[distance_type]
        weibull_model[category_name]['mean_vec'] = mean
        weibull_model[category_name]['weibull_model'] = []
        for channel in range(mean.shape[0]):
            mr = libmr.MR()
            tailtofit = np.sort(dist[distance_type][channel, :])[-tailsize:]
            mr.fit_high(tailtofit, len(tailtofit))
            weibull_model[category_name]['weibull_model'].append(mr)

    return weibull_model
Ejemplo n.º 15
0
def fit_weibull_models(distribution_values, tailsizes, num_max_fits=50):
    """
    Function to fit weibull models on distribution values per class. The distribution values in our case are the
    distances of an inputs approximate posterior value to the per class mean latent z, i.e. The Weibull model fits
    regions of high density and gives credible intervals.
    The tailsize specifies how many outliers are expected in the dataset for which the model has been trained.
    We use libmr https://github.com/Vastlab/libMR (installable through e.g. pip) for the Weibull model fitting.

    Parameters:
        distribution_values (list): Values on which the fit is conducted. In our case latent space distances.
        tailsizes (list): List of integers, specifying tailsizes per class. For a balanced dataset typically the same.
        num_max_fits (int): Number of attempts to fit the Weibull models before timing out and returning unsuccessfully.

    Returns:
        list: List of Weibull models with their respective parameters (stored in libmr class instances).
    """

    weibull_models = []

    # loop through the list containing distance values per class
    for i in range(len(distribution_values)):
        # for each class set the initial success to False and number of attempts to 0
        is_valid = False
        count = 0

        # If the list contains distance values conduct a fit. If it is empty, e.g. because there is not a single
        # prediction for the corresponding class, continue with the next class. Note that the latter isn't expected for
        # a model that has been trained for even just a short while.
        if isinstance(distribution_values[i], torch.Tensor):
            distribution_values[i] = distribution_values[i].cpu().numpy()
            # weibull model per class
            weibull_models.append(libmr.MR())
            # attempt num_max_fits many fits before aborting
            while is_valid is False and count < num_max_fits:
                # conduct the fit with libmr
                weibull_models[i].fit_high(distribution_values[i],
                                           tailsizes[i])
                is_valid = weibull_models[i].is_valid
                count += 1
            if not is_valid:
                print("Weibull fit for class " + str(i) +
                      " not successful after " + str(num_max_fits) +
                      " attempts")
                return weibull_models, False
        else:
            weibull_models.append([])

    return weibull_models, True
def weibull_tailfitting(meantrain_vec,
                        distance_scores,
                        tailsize=20,
                        distance_type='eucos'):
    import libmr
    weibull_model = []
    for i in range(len(meantrain_vec)):
        model = {}
        model['distances_%s' % distance_type] = distance_scores[i]
        model['mean_vec'] = meantrain_vec[i]
        mr = libmr.MR()
        tailtofit = sorted(distance_scores[i])[-tailsize:]
        mr.fit_high(np.array(tailtofit), len(tailtofit))
        model['weibull_model'] = mr
        weibull_model.append(model)
    return weibull_model
Ejemplo n.º 17
0
def weibull_tailfitting(mean, distances, num_classes, tailsize=20):

    weibull_model = {}
    for i in range(num_classes):
        weibull_model[i] = {}

        weibull_model[i]['distances'] = distances[i]
        weibull_model[i]['mean_vec'] = mean[i]
        weibull_model[i]['weibull_model'] = []

        mr = libmr.MR()
        tail_to_fit = sorted(distances[i])[-tailsize:]
        mr.fit_high(tail_to_fit, len(tail_to_fit))
        weibull_model[i]['weibull_model'] += [mr]

        sys.stdout.flush()

    return weibull_model
Ejemplo n.º 18
0
def weibull_tailfitting(prototypes, distances, tailsize=3, distance_type='l2'):
    weibull_model = {}
    for class_nr in range(len(prototypes)):
        weibull_model[class_nr] = {}
        weibull_model[class_nr]['distances_' + str(distance_type)] = distances[class_nr]
        weibull_model[class_nr]['prototype'] = prototypes[class_nr]

        mr = libmr.MR()
        #print(distances)
        tailtofit = sorted(distances[class_nr])[-tailsize:]
        #print('aisjdfoiajsdfoasidfjoaisjdfoaisjdf')
        #print(tailtofit)
        mr.fit_high(tailtofit, len(tailtofit))
        #mr.fit_low(tailtofit, len(tailtofit))

        weibull_model[class_nr]['weibull_model'] = mr

    return weibull_model
Ejemplo n.º 19
0
def weibull_tailfitting(mean,
                        distance,
                        labellist,
                        tailsize=20,
                        distance_type='eucos'):
    weibull_model = {}
    for category in labellist:
        weibull_model[category] = {}
        distance_scores = np.array(distance[category][distance_type])
        meantrain_vec = np.array(mean[category])
        weibull_model[category]['distances_%s' %
                                distance_type] = distance_scores
        weibull_model[category]['mean_vec'] = meantrain_vec
        weibull_model[category]['weibull_model'] = []
        mr = libmr.MR()
        tailtofit = sorted(distance_scores)[-tailsize:]
        mr.fit_high(tailtofit, len(tailtofit))
        weibull_model[category]['weibull_model'] += [mr]
    return weibull_model
Ejemplo n.º 20
0
def fit_weibull(mavs, dists, categories, tailsize=20, distance_type='eucos'):
    weibull_model = {}    # a dictionary data structure, total class_num k-v elements

    # mav's shape (1, class_num)
    # dist's shape (3, 1, N_c)
    for mav, dist, category_name in zip(mavs, dists, categories):
        weibull_model[category_name] = {}  # one weibull model per category
        weibull_model[category_name]['distances_{}'.format(distance_type)] = dist[distance_type]
        weibull_model[category_name]['mean_vec'] = mav   # shape=(1, class_num)
        weibull_model[category_name]['weibull_model'] = []

        for channel in range(mav.shape[0]):
            mr = libmr.MR()
            # default sort axis = -1, the last axis
            tailtofit = np.sort(dist[distance_type][channel, :])[-tailsize:]
            mr.fit_high(tailtofit, len(tailtofit))
            #print('Result of fit_high: ', ret)
            weibull_model[category_name]['weibull_model'].append(mr)   # one mr model per channel per category

    return weibull_model
Ejemplo n.º 21
0
def weibull_tailfitting(eucos_dist, mean_activations, taillength=8):
    """
    Fits a Weibull model of the logit vectors farthest from the MAV.

    :param eucos_dist: the euclidean-cosine distance from the MAV.
    :param mean_activations: mean activation vector (MAV).
    :param taillength:
    :return: weibull model.
    """

    weibull_model = {}
    for cl in range(10):
        weibull_model[str(cl)] = {}
        weibull_model[str(cl)]['eucos_distances'] = eucos_dist[cl]
        weibull_model[str(cl)]['mean_vec'] = mean_activations[cl]
        weibull_model[str(cl)]['weibull_model'] = []
        mr = libmr.MR(verbose=True)
        tailtofit = sorted(mean_activations[0])[-taillength:]
        mr.fit_high(tailtofit, len(tailtofit))
        weibull_model[str(cl)]['weibull_model'] = mr

    return weibull_model
Ejemplo n.º 22
0
def fit_weibull(means, dists, categories, tailsize=20, distance_type='eucos'):
    """
    Input:
        means (C, channel, C)
        dists (N_c, channel, C) * C
    Output:
        weibull_model : Perform EVT based analysis using tails of distances and save
                        weibull model parameters for re-adjusting softmax scores
    """
    weibull_model = {}
    for mean, dist, category_name in zip(means, dists, categories):
        weibull_model[category_name] = {}
        weibull_model[category_name]['distances_{}'.format(distance_type)] = dist[distance_type]
        weibull_model[category_name]['mean_vec'] = mean
        weibull_model[category_name]['weibull_model'] = []
        for channel in range(mean.shape[0]):
            mr = libmr.MR()
            tailtofit = np.sort(dist[distance_type][channel, :])[-tailsize:]
            mr.fit_high(tailtofit, len(tailtofit))
            weibull_model[category_name]['weibull_model'].append(mr)

    return weibull_model
Ejemplo n.º 23
0
    def update_class_stats(self, X, y):
        z = self.latent(X)
        pred_y = self.predict(X)
        correct = (pred_y == np.argmax(y, axis=1))
        z = z[correct]
        pred_y = pred_y[correct]

        # fit weibull model for each class
        self.mr_model = {}
        self.c_means = np.zeros((self.y_dim, z.shape[1]))

        for c in range(self.y_dim):
            # Calculate Class Mean
            z_c = z[pred_y == c]
            mu_c = z_c.mean(axis=0)
            # Fit Weibull
            mr = libmr.MR()
            tailtofit = sorted(self.dist_from_mav(
                z_c, mu_c[None, :]).ravel())[-self.tailsize:]
            mr.fit_high(tailtofit, len(tailtofit))
            self.mr_model[c] = mr
            self.c_means[c, :] = mu_c
Ejemplo n.º 24
0
    def _gather_weibull_distribution(self,
                                     training_features,
                                     div_eu,
                                     weibull_tail_size=20):
        weibull = {
            seen_class_index: {
                'mav': None,
                'eucos_distances': None,
                'weibull_model': None
            }
            for seen_class_index in training_features.keys()
        }
        for index in training_features.keys():
            if not len(training_features[index]) > 0:
                print(f"Error: No training examples for category {index}")
                import pdb
                pdb.set_trace()  # breakpoint 18e1e416 //
            else:
                features_tensor = torch.cat(training_features[index], dim=0)
                mav = torch.mean(features_tensor, 0)
                mav_matrix = mav.unsqueeze(0).expand(features_tensor.size(0),
                                                     -1)
                eu_distances = torch.sqrt(
                    torch.sum(
                        (mav_matrix - features_tensor)**2,
                        dim=1)) / div_eu  # EU distance divided by div_eu.
                cos_distances = 1 - torch.nn.CosineSimilarity(dim=1)(
                    mav_matrix, features_tensor)
                eucos_distances = eu_distances + cos_distances

                weibull[index]['mav'] = mav
                # weibull[index]['eucos_distances'] = eucos_distances

                distance_scores = list(eucos_distances)
                mr = libmr.MR()
                tailtofit = sorted(distance_scores)[-weibull_tail_size:]
                mr.fit_high(tailtofit, len(tailtofit))
                weibull[index]['weibull_model'] = mr
        return weibull
def cls_mav_dist(cls):
    data = {}
    same_cls = feature_vector_file.iloc[results_file.index[results_file[1] ==
                                                           cls], 1:]
    print "same_cls", same_cls.shape
    MAV = same_cls.mean()
    if args.use_euclidean:
        distances = same_cls.apply(
            lambda row: scipy.spatial.distance.euclidean(MAV, row), axis=1)
    else:
        distances = same_cls.apply(
            lambda row: scipy.spatial.distance.cosine(MAV, row), axis=1)
    distances = distances.values.tolist()

    print "len(distances)", len(distances)
    mr = libmr.MR()
    # Fitting an EVT on the distances
    mr.fit_high(distances, args.tail_size)
    data['model'] = mr
    data['MAV'] = MAV
    data['distances'] = distances
    del distances, same_cls
    return data
Ejemplo n.º 26
0
def weibull_tailfitting(meanfiles_path, distancefiles_path, labellist,
                        tailsize=20,
                        distance_type='eucos'):
                        
    """ Read through distance files, mean vector and fit weibull model for each category

    Input:
    --------------------------------
    meanfiles_path : contains path to files with pre-computed mean-activation vector
    distancefiles_path : contains path to files with pre-computed distances for images from MAV
    labellist : criterila_list

    Output:
    --------------------------------
    weibull_model : Perform EVT based analysis using tails of distances and save
                    weibull model parameters for re-adjusting softmax scores    
    """
    
    weibull_model = {}
    # for each category, read meanfile, distance file, and perform weibull fitting
    for category in labellist:
        weibull_model[category] = {}
        distance_mat_path = pathlib.Path(distancefiles_path).joinpath('{}_distance.mat'.format(category))
        mean_train_vec_path = pathlib.Path(meanfiles_path).joinpath('{}.mat'.format(category))
        if not (distance_mat_path.exists() and mean_train_vec_path.exists()):
            continue
        distance_scores = loadmat(distance_mat_path)[distance_type]
        meantrain_vec = loadmat(mean_train_vec_path)
        weibull_model[category]['distances_%s' % distance_type] = distance_scores
        weibull_model[category]['mean_vec'] = meantrain_vec
        weibull_model[category]['weibull_model'] = []
        for channel in range(1):
            mr = libmr.MR()
            tailtofit = sorted(distance_scores[channel, :])[-tailsize:]
            mr.fit_high(tailtofit, len(tailtofit))
            weibull_model[category]['weibull_model'] += [mr]
    return weibull_model
Ejemplo n.º 27
0
def fit(X, y, tailsize, Cl, distance):
    """
    Returns the Weibull parameters of each instance of class Cl, that is, the parameters that model the
    distribution of the shortest 'tailsize' margins of that class with respect to all other classes
    :param X: List containing matrices of (Nl x dimension_of_feature_vector) of all training classes
    :param y: Labels of the classes
    :param tailsize: Number of margins to be fitted by the Weibull distribution
    :param Cl: Class identifier from list known_classes
    :return: PSI_l --> (Nl x 2) matrix containing the scale (lambda) and shape (k) of the fitted margins for each instance
                    of the class l
    """
    Xl, Xnotl = select_class(Cl, X, y)
    # distance computation
    if (distance == 0):
        D = pairwise_euclidean_distance(Xl, Xnotl)
    elif (distance == 1):
        D = ppp_cosine_similarity(Xl, Xnotl)
    D = D.numpy()
    #print(D)
    Nl = len(Xl[:, 0])
    # PSI_l is formed by (lambda, k)
    PSI_l = np.zeros((Nl, 2))
    mr = libmr.MR()
    for i in range(0, Nl):
        # We want to know the distribution of the MARGINS (we have to divide by 2 because the margin is the point that is half-way the negative sample)
        # We have to sort the vector of distances because we are interested in the closest instances, that are the most important defining the margins
        # because they can create confusion. NOTE = 0.5 is because is a margin
        d_sorted = 0.5 * np.sort(D[i, :])[:tailsize]

        if (distance == 0):
            mr.fit_low(d_sorted, tailsize)
            PSI_li = mr.get_params()[:2]
        elif (distance == 1):
            k_i, lambda_i = fit_(d_sorted, iters=100, eps=1e-6)
            PSI_li = (lambda_i, k_i)
        PSI_l[i, :] = PSI_li
    return PSI_l
def fit_psi(X, y, tau, Cl):
    # obtain samples of class current
    l = np.argwhere(y == Cl).reshape(-1)
    X_l = X[l]

    # samples obtained from other classes
    m = np.argwhere(y != Cl).reshape(-1)
    X_m = X[m]

    # calculates the distance between pair of samples
    # current class, X_l, and samples of other classes, X_m
    D = sklearn.metrics.pairwise.pairwise_distances(X_l, X_m)

    psi = []

    # for each sample belonging to class Cl, esteem the
    # shape and scale parameters based on the half of the distance
    # of the tau samples not belonging to Cl
    for i in range(X_l.shape[0]):
        mr = libmr.MR()
        mr.fit_low(1/2 * D[i], tau)
        psi.append(mr)
    
    return np.array(psi)
Ejemplo n.º 29
0
def fit_psi(X, y, tau, Cl):
    # obtém as amostras da classe atual
    l = np.argwhere(y == Cl).reshape(-1)
    X_l = X[l]

    # obtém as amostras das outras classes
    m = np.argwhere(y != Cl).reshape(-1)
    X_m = X[m]

    # calcula os pares de distâncias entre as amostras da classe
    # atual, X_l, e as amostras das outras classes, X_m
    D = sklearn.metrics.pairwise.pairwise_distances(X_l, X_m)

    psi = []

    # para cada amostra pertencente à classe Cl, estima os
    # parâmetros shape e scale com base na metade da distância
    # das tau amostras mais próximas não pertencentes a Cl
    for i in range(X_l.shape[0]):
        mr = libmr.MR()
        mr.fit_low(1 / 2 * D[i], tau)
        psi.append(mr)

    return np.array(psi)
Ejemplo n.º 30
0
 def test_str(self):
     # We should be able to convert an MR to a string
     mr = libmr.MR()
     mr.fit_high(np.random.randn(100), 100)
     assert len(str(mr)) > 0