Ejemplo n.º 1
0
def test_emd_samples_validate_distance_matrix_size():
    dist = lambda x: [[0, 1],
                      [1, 0]]
    first_array = [1, 2, 3, 4]
    second_array = [1, 2, 3, 4]
    with pytest.raises(ValueError):
        emd_samples(first_array, second_array, distance=dist)
Ejemplo n.º 2
0
def wasserstein_randomization(d1_large, d2_large, iters, downsample_size=100):
    """
    Combine synthetic and real data into two sets and randomly 
    divide the data into two new random sets. Check the wasserstein
    distance (earth movers distance) between these two new muddled sets.
    Use the measured wasserstein distance to compute the ratio between
    it and the median of the null distribution (earth movers distance on
    original set). A ratio of 0 would indicate that the two marginal 
    distributions are identical.

    From "REALLY USEFUL SYNTHETIC DATA
    A FRAMEWORK TO EVALUATE THE QUALITY OF
    DIFFERENTIALLY PRIVATE SYNTHETIC DATA"
    https://arxiv.org/pdf/2004.07740.pdf

    NOTE: We return the mean here. However, its best
    probably to analyze the distribution of the wasserstein score

    :param d1_large: real data
    :type d1_large: pandas DataFrame
    :param d2_large: fake data
    :type d2_large: pandas DataFrame
    :param iters: how many iterations to run the randomization
    :type iters: int
    :param downsample_size: we downsample the original datasets due
    to memory constraints
    :type downsample_size: int
    :return: wasserstein randomization mean
    :rtype: float
    """
    # pip install pyemd
    # https://github.com/wmayner/pyemd
    from pyemd import emd_samples

    assert (len(d1_large) == len(d2_large))
    d1 = d1_large.sample(n=downsample_size)
    d2 = d2_large.sample(n=downsample_size)
    l_1 = len(d1)
    d3 = np.concatenate((d1, d2))
    distances = []
    for i in range(iters):
        np.random.shuffle(d3)
        n_1, n_2 = d3[:l_1], d3[l_1:]
        try:
            # PyEMD is sometimes memory intensive
            # Let's reduce bins if so
            dist = emd_samples(n_1, n_2, bins='auto')
        except MemoryError:
            dist = emd_samples(n_1, n_2, bins=10)
        distances.append(dist)

    # Safety check, to see if there are any valid
    # measurements
    if len(distances) == 0:
        return -1

    d_pd = pd.DataFrame(distances)
    print(d_pd.describe())

    return np.mean(np.array(distances))
Ejemplo n.º 3
0
def empirical_distribution_distance(model_a,
                                    model_b,
                                    nb_samples,
                                    discrete=True):
    samples_a = model_a.sample(nb_samples)
    samples_b = model_b.sample(nb_samples)
    if discrete:
        return emd_samples(samples_a, samples_b, distance=discrete_distance)
    # return emd_samples(samples_a, samples_b, bins=2 * len(samples_a))
    return emd_samples(samples_a, samples_b)
Ejemplo n.º 4
0
def emd(data, cluster_col):
    clusters = data[cluster_col]
    max_clust = np.max(list(clusters))

    cluster_emd_vecs = []
    # iterate through clusters
    for cluster in range(max_clust + 1):
        cluster_data = data[data[cluster_col] == cluster]
        cluster_size = len(cluster_data)
        if cluster_size == 0:
            #print("Cluster Size 0 error")
            continue

        crime_list = list(cluster_data['All Crime'])
        neighborhood_mean = np.mean(crime_list)
        sq_mu = math.sqrt(neighborhood_mean)

        sample_emd = []
        # average to eliminate impact of randomness on calculation
        # make result more consistent
        for j in range(50):
            comparison_dist = []
            for i in range(len(crime_list)):
                comparison_dist.append(neighborhood_mean +
                                       np.random.uniform(-1.0 * sq_mu, sq_mu))
            sample_emd.append(emd_samples(crime_list, comparison_dist))
        cluster_emd_vecs.append(np.mean(sample_emd))

    return cluster_emd_vecs
Ejemplo n.º 5
0
def calculate_emd_avg(y_sim, alpha, r0):
    """Calculates average Earth Movers distance between two distributions
    
    For each simulations, takes wait-time distribution and calculates the EMD against the theoretical weibull distribution 
    given parameters at the start of the simulation. Normalised by r0.
    
    Args:
        y_sim (list): (n_sim, n_wait_times) dimension list of wait-times for each simulation, 
            for given reaction channel (esc, epi). Obtained from Channels.wait_times attribute
    
    Returns:
        EMD averaged over all simulations normalised by r0
    """

    n_sim = len(y_sim)

    # params for generating theoretical samples (ground truth)
    k = alpha + 1  # shape
    beta = (alpha + 1) * (r0 * gamma((alpha + 2) / (alpha + 1)))**(alpha + 1)
    lam = np.power((alpha + 1) / beta, 1 / (alpha + 1))  # scale
    n_samples = 100000

    emd_list = []
    for i in range(n_sim):
        y_hat = np.array(y_sim[i])
        y_true = weibull_min.rvs(k, loc=0, scale=lam, size=n_samples)
        emd_list.append(emd_samples(y_hat, y_true))

    return np.mean(emd_list) * r0
def cal_emd_resamp(A, B, n_samp, times):
    emds = []
    for t in range(times):
        idx_a = np.random.choice(len(A), n_samp)
        idx_b = np.random.choice(len(B), n_samp)
        emds.append(emd_samples(A[idx_a], B[idx_b]))
    return np.mean(emds)
Ejemplo n.º 7
0
def calculate_emd(y_sim, alpha, r0):
    """Calculates Earth Movers distance between two distributions
    
    Takes wait-time distribution and calculates the EMD against the theoretical weibull distribution 
    given parameters at the start of the simulation
    
    Args:
        y_sim (list): (n_sim, n_wait_times) list of wait-times for each simulation, 
            for given reaction channel (esc, epi). Obtained from Channels.wait_times attribute
    
    Returns:
        EMD between simulation and true weibull distribution
    """
    y_sim = [item for sublist in y_sim for item in sublist]  # flatten list
    y_sim = np.array(y_sim)
    n = 10000

    # params for generating theoretical samples (ground truth)
    k = alpha + 1  # shape
    beta = (alpha + 1) * (r0 * gamma((alpha + 2) / (alpha + 1)))**(alpha + 1)
    lam = np.power((alpha + 1) / beta, 1 / (alpha + 1))  # scale
    # generate true samples
    y_true = weibull_min.rvs(k, loc=0, scale=lam, size=n)

    d = emd_samples(y_sim, y_true)

    return d
Ejemplo n.º 8
0
 def earth_mover(self, decisions):
     # decisions.size() == (batch_size, sample_size, attr_vocab_size)
     length = decisions.size(-1)
     indexes = (decisions.float().numpy() >= 0.5)
     emd = [[
         emd_samples(np.arange(length)[index].tolist(), self.samples[0])
         if index.sum() > 0 else 1.0 for index in indexes[bid]
     ] for bid in range(decisions.size(0))]
     return torch.tensor(emd, dtype=torch.float, device=decisions.device)
Ejemplo n.º 9
0
def compare_pitchers(one, two):
    x = pitcher_array(one)

    z = pitcher_array(two)
    print(x)
    print(z)
    min_len = min(len(x), len(z))

    return (emd_samples(x[:min_len], z[:min_len]))
def cal_emd_resamp(A, B, n_samp, times, bins=2):
    emds = []
    if bins is None:
        bins = 2
    for t in range(times):
        idx_a = np.random.choice(len(A), n_samp)
        idx_b = np.random.choice(len(B), n_samp)
        emds.append(emd_samples(A[idx_a], B[idx_b], bins=bins))
    return np.mean(emds)
Ejemplo n.º 11
0
def compute_emd_split_samples(scores,
                              train_scores,
                              scores_sample_size=scores_sample_size):
    scores_split = np.array_split(scores, len(scores) / scores_sample_size)
    emd_scores_samples = [
        pyemd.emd_samples(split, train_scores, bins=50)
        for split in scores_split
    ]
    return emd_scores_samples
Ejemplo n.º 12
0
def test_emd_samples_all_kwargs():
    # Regression only; not checked by hand
    dist = lambda x: [[(i - j)**3 for i in range(len(x))]
                      for j in range(len(x))]
    first_array = [1, 2, 3, 4, 5]
    second_array = [2, 3, 4, 5]
    emd_assert(
        emd_samples(first_array,
                    second_array,
                    bins=30,
                    normalized=False,
                    range=(-5, 15),
                    distance=dist), 24389.0)
Ejemplo n.º 13
0
def get_value(img, frame):
    contour1, cnt = get_cnt(img.copy()), get_cnt(frame.copy())
    M = cv2.moments(contour1)
    cx = int(M['m10']/M['m00'])
    cy = int(M['m01']/M['m00'])
    dist1 = np.array(map(lambda x: distant((cx, cy), (x[0][0], x[0][1])), contour1))

    M = cv2.moments(cnt)
    cx = int(M['m10']/M['m00'])
    cy = int(M['m01']/M['m00'])
    dist2 = np.array(map(lambda x: distant((cx, cy), (x[0][0], x[0][1])), cnt))

    v = emd_samples(dist1, dist2, bins=2)
    return v
Ejemplo n.º 14
0
def test_emd_samples_all_kwargs():
    # Regression only; not checked by hand
    dist = lambda x: [
        [(i - j)**3 for i in range(len(x))] for j in range(len(x))
    ]
    first_array = [1, 2, 3, 4, 5]
    second_array = [2, 3, 4, 5]
    emd_assert(
        emd_samples(first_array, second_array,
                    bins=30,
                    normalized=False,
                    range=(-5, 15),
                    distance=dist),
        24389.0
    )
Ejemplo n.º 15
0
def evaluation(file_name):
    gt_path = os.path.join(args.gt_path, file_name)
    pre_path = os.path.join(args.pre_path, file_name)
    assert os.path.exists(gt_path)
    assert os.path.exists(pre_path)

    gt_points = np.loadtxt(gt_path)
    pre_points = np.loadtxt(pre_path)

    gt2pre, _ = NearestNeighbors(
        n_neighbors=1, algorithm='auto').fit(pre_points).kneighbors(gt_points)
    pre2gt, _ = NearestNeighbors(
        n_neighbors=1, algorithm='auto').fit(gt_points).kneighbors(pre_points)

    return np.squeeze(gt2pre), np.squeeze(pre2gt), emd_samples(
        gt_points, pre_points)
def paral_process_similirity(arg):

    index1 = arg
    print('index1',index1)
    # randomNum = random.randint(2,10)
    # time.sleep(randomNum)
    vectori = (unidata.values[index1][0:vend])  # 第 i 个曲线向量
    #print('vectori:',vectori)
    # 小波分解处理
    mywavestr = 'sym2'
    mywavlevel = 2
    coeffsvi = pywt.wavedec(vectori, mywavestr, level=mywavlevel)
    waveLdata = coeffsvi[0]
    vecroti_wave = waveLdata.astype(int)

    list_ddpearson = []
    list_ddbrc = []
    list_ddemdwave = []
    for vj in range(vsize):
        # index2=arg
        # print("index1: ",index1, "index2: ", index2)
        vectorj = (unidata.values[vj][0:vend])  # 第 j 个曲线向量
        # vectorj = (unidata.values[index2][0:vend])
        vectorij = np.vstack([vectori, vectorj])  # i,j 向量合并
        # 小波分解处理
        coeffsvj = pywt.wavedec(vectorj, mywavestr, level=mywavlevel)
        waveLdata_vj = coeffsvj[0]
        vecrotj_wave = waveLdata_vj.astype(int)
        # 小波分解处理完成

        # pearson系数
        distpearson = pdist(vectorij, 'cosine')
        list_ddpearson.append(distpearson[0])

        # 布雷柯蒂斯距离
        distbrc = pdist(vectorij, 'braycurtis')
        list_ddbrc.append(distbrc[0])

        #模态经验分解
        distemdwave = emd_samples(vecroti_wave, vecrotj_wave, bins='auto')
        #print('distemdwave',distemdwave)
        list_ddemdwave.append(distemdwave)
    #print('list_ddemdwave:',list_ddemdwave)
    return list_ddpearson,list_ddbrc,list_ddemdwave
Ejemplo n.º 17
0
def train_rep(model,
              lr,
              X,
              P,
              n_iter,
              c_iter,
              batch_size,
              alpha=10,
              C_reg=1,
              compute_emd=False,
              adv=True,
              verbose=False):
    """
    Train the fair representation using autoencoder provided by the user.
    Parameters:
        model: the Pytorch model of the autoencoder. The model should have two members, model.encoder, and the model.decoder.

    Parameters:
        lr: learning rate.
        X: the input features.
        P: the protected attribute.
        n_iter: number of iterations.
        c_iter: the number of iteration to trian the critic inside each training iteration.
        batch_size: batch size.
        alpha: the weight of the fairness contraint. Larger means more penalize on the violation of fairness.
        C_reg: the penalization coefficient of the regularization of the encoder.
        compute_emd: whether the EMD distance is calculated for each iteration. It may slow the training process significantly.
        adv: if the model is trained adversarially, i.e. fairly. Setting it false will result in training a normal autoencoder.
        verbose: if the training process is verbosely printed.
    """
    time_s = time.time()
    X_0 = X[P == 0]
    X_1 = X[P == 1]

    optim_encoder = optim.Adam(model.encoder.parameters(), lr=lr)
    optim_decoder = optim.Adam(model.decoder.parameters(), lr=lr)
    optim_crit = optim.Adam(model.critic.parameters(), lr=0.1)

    l1_crit = nn.L1Loss(size_average=False)

    n_of_batch = int(len(X) / (batch_size * 2)) * n_iter

    for i in range(n_of_batch):
        X_n = X_0[np.random.choice(len(X_0), batch_size)]
        X_u = X_1[np.random.choice(len(X_1), batch_size)]
        if adv:
            w_dist_last = 0
            eps = 1
            #while w_dist <= 0:
            while eps >= 1e-3:
                #while True:
                for t in range(c_iter):
                    optim_crit.zero_grad()
                    w_dist = model.wdist(X_n, X_u)
                    loss = -w_dist
                    loss.backward(retain_graph=True)
                    optim_crit.step()
                    eps = np.abs(w_dist.data.item() - w_dist_last)

                    # keep training crit until distance no longer decrease
                    w_dist_last = w_dist.data.item()

                    for p in model.critic.parameters():
                        p.data.clamp_(-0.1, 0.1)

        # for t in range(c_iter):
        optim_encoder.zero_grad()
        optim_decoder.zero_grad()

        # only use the encoder g
        mse, wdist = model.forward(X_n, X_u)

        if adv:
            loss = mse + wdist * alpha
        else:
            loss = mse

        # L1 regularization
        reg_loss = 0
        #for param in model.encoder.parameters():
        #    reg_loss += torch.abs(param).sum()
        for layer in model.encoder:
            if type(layer) is nn.Linear:
                #norm = torch.sum(torch.pow(torch.sum(torch.abs(layer.weight), dim=0), 2))
                norm = 0.0
                for row in layer.weight.transpose(0, 1):
                    norm += torch.sum(torch.pow(row, 2))
                reg_loss += norm

        loss += C_reg * reg_loss
        loss.backward(retain_graph=True)

        # use mse and wdist to update g and f
        optim_encoder.step()
        optim_decoder.step()

        text = 'mse: %.4f, critic: %.4f' % (mse.item(), wdist.item())
        if compute_emd:
            g_0 = model.encoder(X_u).detach().cpu().numpy()
            g_1 = model.encoder(X_n).detach().cpu().numpy()
            real_emd = emd_samples(g_0, g_1)
            text += ", emd: %.4f" % real_emd

        if verbose:
            update_progress(i, n_of_batch, time_s, text=text + ' ')
Ejemplo n.º 18
0
        if mode == 'all':
            for v in self.split_data.values():
                data.append(v[0])
                label.append(v[1])
            data, label = (np.vstack(data), np.vstack(label))
        else:
            data, label = self.split_data[mode]
        idx = [i for i in range(len(data))]
        np.random.shuffle(idx)
        idx = np.array(idx)

        for i in trange(len(idx) // batch_size, ascii=True):
            x = data[idx[i * batch_size:(i + 1) * batch_size]]
            y = label[idx[i * batch_size:(i + 1) * batch_size]]
            yield (x, y)


if __name__ == "__main__":
    data_loader = MNIST('config.yaml')

    from pyemd import emd_samples

    label_data, _ = data_loader.getData(mode='label')
    unlabel_data, _ = data_loader.getData(mode='unlabel')
    label_data = np.vstack((label_data, unlabel_data[0]))
    all_data, _ = data_loader.getData()
    print(emd_samples(label_data.flatten(), all_data[:2000].flatten()))
    # for x, y in data_loader.next_batch(batch_size=64):
    #     print(y)
    # plt.imshow(data_loader.getData('label')[0][i].reshape((28,28)), cmap='gray')
    # plt.show()
Ejemplo n.º 19
0
 aucbor_tmp = 0
 sauc_tmp = 0
 for pred, gt in zip(predictions, y_valid_cv):
     res = postprocess_predictions(pred[0], shape_r_out,
                                   shape_c_out)
     res = res / 255
     aucjud_tmp += auc_judd(res, gt[2][0, 0])
     sim_tmp += similarity(res, gt[0][0, 0])
     aucbor_tmp += auc_borji(res, gt[2][0, 0])
     nss_tmp += nss_metric(gt[2][0, 0], res)
     cc_tmp += cc(gt[0][0, 0], res)
     kl_tmp += kldiv(gt[0][0, 0], res)
     emdgt = gt[0][0, 0] * 255
     emdres = res * 255
     emd_tmp += emd_samples(emdgt.flatten(),
                            emdres.flatten(),
                            bins=255)
     sauc_tmp += auc_shuff(res, gt[2][0, 0], smap)
 print(emd_tmp / nb_val)
 sum_nss += nss_tmp / nb_val
 sum_cc += cc_tmp / nb_val
 sum_kl += kl_tmp / nb_val
 sum_emd += emd_tmp / nb_val
 sum_aucjud += aucjud_tmp / nb_val
 sum_sim += sim_tmp / nb_val
 sum_aucbor += aucbor_tmp / nb_val
 sum_sauc += sauc_tmp / nb_val
 f.write("{},{},{},{},{},{},{},{}\n".format(
     aucjud_tmp / nb_val, sim_tmp / nb_val, emd_tmp / nb_val,
     aucbor_tmp / nb_val, sauc_tmp / nb_val, cc_tmp / nb_val,
     nss_tmp / nb_val, kl_tmp / nb_val))
Ejemplo n.º 20
0
import pyemd
import  numpy as np
a =[1,2,3]
b = [4,5,6]
a = np.array(a)
b = np.array(b)
res = pyemd.emd_samples(a,b)
print(res,type(res))
Ejemplo n.º 21
0
def test_emd_samples_1_custom_distance():
    dist = lambda x: np.array([[0.0 if i == j else 1.0 for i in x] for j in x])
    first_array = [1, 2, 3, 4]
    second_array = [2, 3, 4, 5]
    emd_assert(emd_samples(first_array, second_array, distance=dist), 0.25)
Ejemplo n.º 22
0
def test_emd_samples_1_manual_range():
    first_array = [1, 2, 3, 4]
    second_array = [2, 3, 4, 5]
    emd_assert(emd_samples(first_array, second_array, range=(0, 10)), 1.0)
Ejemplo n.º 23
0
def test_emd_samples_2():
    first_array = [1]
    second_array = [2]
    emd_assert(emd_samples(first_array, second_array), 0.5)
    d[bowler] = a

for bowler1 in d.keys():
    a = d[bowler1]
    differences[bowler1] = {}
    maximum_score[bowler1] = 0.0
    bowler_list.append(bowler1)

    for bowler2 in d.keys():

        if ((bowler1 in players) and (bowler2 in players)):
            b = d[bowler2]

            a = a.astype('float')
            b = b.astype('float')
            c = emd_samples(a, b, bins=255)

            differences[bowler1][bowler2] = c

            maximum_score[bowler1] = max(maximum_score[bowler1], c)

            #print bowler1 + "," + bowler2 + "," + str(c)

for bowler1 in players:
    for bowler2 in players:
        differences[bowler1][bowler2] /= maximum_score[bowler1]

#tree_file = open('comparing-bowlers.csv', 'w')
tree_file = open('comparing-batsmen-' + interpolation_type + '.csv', 'w')
writer = csv.writer(tree_file, delimiter=',')
Ejemplo n.º 25
0
    # pitch_dist = leaders(note_pitch)
    # duration_dist = leaders(note_duration)
    print('\n', dir_name)

    # # calculate note pitch/duration
    # ab_list_pitch = []
    # ab_list_duration = []

    print("Duration:")

    # A-x, B-x
    ab_x_emd = []
    ab_x_eud = []

    for _ in ab_list_duration:
        ab_x_emd.append(emd_samples(_, note_duration))
        ab_x_eud.append(eud_dis(_, note_duration))

    # A - B
    ab_emd = emd_samples(ab_list_duration[0], ab_list_duration[1])
    ab_eud = eud_dis(ab_list_duration[0], ab_list_duration[1])

    # diff and ratio
    diff_emd = sum(ab_x_emd) - ab_emd
    ratio_emd = sum(ab_x_emd) / ab_emd

    diff_eud = sum(ab_x_eud) - ab_eud
    ratio_eud = sum(ab_x_eud) / ab_eud

    print("diff_emd:{:.3f} ratio_emd:{:.3f} diff_eud:{:.3f} ratio_eud:{:.3f}".format(diff_emd, ratio_emd, diff_eud,
                                                                                     ratio_eud))
Ejemplo n.º 26
0
import numpy as np
from pyemd import emd
from pyemd import emd_with_flow
from pyemd import emd_samples

s1 = 8
s2 = 8
np.random.seed(10)

a = np.random.rand(s1)
b = np.random.rand(s2)
d = np.random.rand(s1, s2)

result1 = emd(a, b, d)
result2 = emd_with_flow(a, b, d)
result3 = emd_samples(a, b)

print (result1)
print ("\n", result2)
print (result3)
Ejemplo n.º 27
0
def test_emd_samples_validate_distance_matrix_size():
    dist = lambda x: [[0, 1], [1, 0]]
    first_array = [1, 2, 3, 4]
    second_array = [1, 2, 3, 4]
    with pytest.raises(ValueError):
        emd_samples(first_array, second_array, distance=dist)
Ejemplo n.º 28
0
def test_emd_samples_1():
    first_array = [1, 2, 3, 4]
    second_array = [2, 3, 4, 5]
    emd_assert(emd_samples(first_array, second_array), 0.75)
Ejemplo n.º 29
0
def test_emd_samples_4():
    first_array = [1, 2, 3, 4, 5]
    second_array = [99, 98, 97, 96, 95]
    emd_assert(emd_samples(first_array, second_array), 78.4)
Ejemplo n.º 30
0
def test_emd_samples_1_binsize():
    first_array = [1, 2, 3, 4]
    second_array = [2, 3, 4, 5]
    emd_assert(emd_samples(first_array, second_array, bins=2), 0.5)
Ejemplo n.º 31
0
def test_emd_samples_validate_empty():
    first_array = []
    second_array = [1]
    with pytest.raises(ValueError):
        emd_samples(first_array, second_array)
Ejemplo n.º 32
0
def test_emd_samples_1_not_normalized():
    first_array = [1, 2, 3, 4]
    second_array = [2, 3, 4, 5]
    emd_assert(emd_samples(first_array, second_array, normalized=False), 3.0)
Ejemplo n.º 33
0
def evaluate(ori_path, bin_path, dec_path, log_path):
    ### mse, psnr, hausdorff, h.psnr ###
    pc_error = sp.run([
        os.path.join(BASE_DIR, 'pc_error/build/pc_error'), '-a', ori_path,
        '-b', dec_path, '-d'
    ],
                      stdout=sp.PIPE,
                      universal_newlines=True)
    for line in pc_error.stdout.splitlines():
        m = re.search('(?<=mseF      \(p2point\): ).*', line)
        if (m):
            mse = m.group()
        m = re.search('(?<=mseF,PSNR \(p2point\): ).*', line)
        if (m):
            psnr = m.group()
        m = re.search('(?<=h.        \(p2point\): ).*', line)
        if (m):
            hausdorff = m.group()
        m = re.search('(?<=h.,PSNR   \(p2point\): ).*', line)
        if (m):
            h_psnr = m.group()

    # for line in pc_error.stdout.splitlines():
    #     m = re.search('(?<=mse2      \(p2point\): ).*', line)
    #     if (m):
    #         mse = m.group()
    #     m = re.search('(?<=mse2,PSNR \(p2point\): ).*', line)
    #     if (m):
    #         psnr = m.group()
    #     m = re.search('(?<=h.       2\(p2point\): ).*', line)
    #     if (m):
    #         hausdorff = m.group()
    #     m = re.search('(?<=h.,PSNR  2\(p2point\): ).*', line)
    #     if (m):
    #         h_psnr = m.group()

    ### Chamfer distance, EMD ###
    ori_ply = PlyData.read(ori_path)
    ori_pc = np.array([
        ori_ply['vertex']['x'], ori_ply['vertex']['y'], ori_ply['vertex']['z']
    ])
    ori_pc = np.transpose(ori_pc)
    ori_pc = np.expand_dims(ori_pc, axis=0)

    dec_ply = PlyData.read(dec_path)
    dec_pc = np.array([
        dec_ply['vertex']['x'], dec_ply['vertex']['y'], dec_ply['vertex']['z']
    ])
    dec_pc = np.transpose(dec_pc)
    dec_pc = np.expand_dims(dec_pc, axis=0)

    cd_api = chamfer_distance_api.Chamfer_distance()
    cd = cd_api.get_chamfer_distance(ori_pc, dec_pc)
    emd = emd_samples(ori_pc, dec_pc)

    ### compressed file size ###
    bin_size = (os.stat(bin_path).st_size) / 1000  # kB

    with open(log_path, 'a') as f:
        lines = [
            f'Point Cloud Compression Evaluation\n\n', f'ply1: {ori_path}\n',
            f'ply2: {dec_path}\n\n', f'MSE                    : {mse}\n',
            f'PSNR                   : {psnr}\n',
            f'Hausdorff distance     : {hausdorff}\n',
            f'H. PSNR                : {h_psnr}\n',
            f'Chamfer distance       : {cd}\n',
            f'Earth Mover\'s distance : {emd}\n\n',
            f'bin_file size (kB)  : {bin_size}\n'
        ]
        f.writelines(lines)
Ejemplo n.º 34
0
def test_emd_samples_3():
    first_array = [1, 1, 1, 2, 3]
    second_array = [1, 2, 2, 2, 3]
    emd_assert(emd_samples(first_array, second_array), 0.32)
Ejemplo n.º 35
0
def get_data():  #读取数据,根据cos值构造相似矩阵

    #unidata = pd.read_excel('C:/Users/hxk/Desktop/距离测试/distest.xlsx', header=None, skiprows=None)  # 训练数据文件
    unidata = pd.read_excel(
        'E:/Project/螺丝拧紧/第一批数据/原始数据预处理_excel文件/datauni1_512.xlsx',
        header=None,
        skiprows=None)  # 训练数据文件
    datashape = unidata.shape
    data_rows = datashape[0]
    #data_cols = datashape[1]
    test_sub = 1900
    vsize = data_rows - test_sub
    ddpearson = np.zeros((vsize, vsize))
    ddbrc = np.zeros((vsize, vsize))
    ddkl = np.zeros((vsize, vsize))
    ddjs = np.zeros((vsize, vsize))
    ddemdwave = np.zeros((vsize, vsize))
    vend = 128  # 数据长度

    for vi in range(vsize):
        #vectori = abs(unidata.values[vi][0:vend])  # 第 i 个曲线向量
        vectori = (unidata.values[vi][0:vend])  # 第 i 个曲线向量
        # 小波分解处理
        mywavestr = 'sym2'
        mywavlevel = 2
        coeffsvi = pywt.wavedec(vectori, mywavestr, level=mywavlevel)
        waveLdata = coeffsvi[0]
        vecroti_wave = waveLdata.astype(int)
        # 小波分解处理完成
        for vj in range(vsize):
            #vectorj = abs(unidata.values[vj][0:vend])   # 第 j 个曲线向量
            vectorj = (unidata.values[vj][0:vend])  # 第 j 个曲线向量
            vectorij = np.vstack([vectori, vectorj])  # i,j 向量合并
            # 小波分解处理
            coeffsvj = pywt.wavedec(vectorj, mywavestr, level=mywavlevel)
            waveLdata_vj = coeffsvj[0]
            vecrotj_wave = waveLdata_vj.astype(int)
            # 小波分解处理完成
            # 为 KL 散度距离准备数据
            vectori_sum = sum(vectori)
            vectorj_sum = sum(vectorj)
            vkl_i = vectori / vectori_sum + 1e-10
            vkl_j = vectorj / vectorj_sum + 1e-10  # 为 KL 散度距离准备数据
            # pearson系数
            distpearson = pdist(vectorij, 'cosine')
            ddpearson[vi][vj] = distpearson
            # 布雷柯蒂斯距离
            distbrc = pdist(vectorij, 'braycurtis')
            ddbrc[vi][vj] = distbrc
            # JS 散度
            # vectork = (vectori + vectorj) / 2
            #distjs = 0.5 * scipy.stats.entropy(vectori, vectork) + 0.5 * scipy.stats.entropy(vectorj, vectork)
            #ddjs[vi][vj] = distjs
            # KL 散度
            #distkl = scipy.stats.entropy(vkl_i, vkl_j)
            #ddkl[vi][vj] = distkl
            # EMD 小波处理
            distemdwave = emd_samples(vecroti_wave, vecrotj_wave, bins=50)
            ddemdwave[vi][vj] = distemdwave
            # EMD小波处理
    ddpearsonuni = DDuni(ddpearson)
    ddbrcuni = DDuni(ddbrc)
    ddemdwaveuni = DDuni(ddemdwave)
    #ddjsuni = DDuni(ddjs)
    #ddkluni = DDuni(ddkl)
    ddpearsonuni_rate = 0.33
    ddbrcuni_rate = 0.33
    ddemdwave_rate = 0.33
    ddjsuni_rate = 0.25
    ddkluni_rate = 0.25
    SPCM = ((ddpearsonuni_rate * ddpearsonuni) + (ddbrcuni_rate * ddbrcuni) +
            (ddemdwave_rate * ddemdwaveuni))
    # +    (ddjsuni_rate * ddjsuni)            + (ddkluni_rate   * ddkluni)   )

    return SPCM, unidata
Ejemplo n.º 36
0
def test_emd_samples_3():
    first_array = [1, 1, 1, 2, 3]
    second_array = [1, 2, 2, 2, 3]
    emd_assert(emd_samples(first_array, second_array), 0.32)
Ejemplo n.º 37
0
def test_emd_samples_validate_empty():
    first_array = []
    second_array = [1]
    with pytest.raises(ValueError):
        emd_samples(first_array, second_array)
Ejemplo n.º 38
0
def test_emd_samples_5():
    first_array = [1]
    second_array = [1, 2, 3, 4, 5]
    emd_assert(emd_samples(first_array, second_array), 1.8)
Ejemplo n.º 39
0
def evaluate(ori_path, bin_path, dec_path, log_path):
    ### acd, cd, cd-psnr, hausdorff ###
    pc_error = sp.run(['../../evaluator/geo_dist/build/pc_error',
                       '-a', ori_path, '-b', dec_path, '-d'], 
                       stdout=sp.PIPE, stderr=sp.DEVNULL, universal_newlines=True)
    for line in pc_error.stdout.splitlines():
        m = re.search('(?<=A->B,ACD1,p2point,).*', line)
        if (m):
            acd1_pt = m.group()
        m = re.search('(?<=A->B,ACD1,p2plane,).*', line)
        if (m):
            acd1_pl = m.group()
        m = re.search('(?<=B->A,ACD2,p2point,).*', line)
        if (m):
            acd2_pt = m.group()
        m = re.search('(?<=B->A,ACD2,p2plane,).*', line)
        if (m):
            acd2_pl = m.group()
        m = re.search('(?<=Symmetric,CD,p2point,).*', line)
        if (m):
            cd_pt = m.group()
        m = re.search('(?<=Symmetric,CD-PSNR,p2point,).*', line)
        if (m):
            cd_psnr_pt = m.group()
        m = re.search('(?<=Symmetric,CD,p2plane,).*', line)
        if (m):
            cd_pl = m.group()
        m = re.search('(?<=Symmetric,CD-PSNR,p2plane,).*', line)
        if (m):
            cd_psnr_pl = m.group()
        m = re.search('(?<=Symmetric,hF,p2point,).*', line)
        if (m):
            hausdorff_pt = m.group()
        m = re.search('(?<=Symmetric,hF,p2plane,).*', line)
        if (m):
            hausdorff_pl = m.group()

    ### EMD ###
    ori_ply = PlyData.read(ori_path)
    ori_pc = np.array([ori_ply['vertex']['x'], ori_ply['vertex']['y'], ori_ply['vertex']['z']]).transpose()
    ori_pc = np.expand_dims(ori_pc, axis=0)

    dec_ply = PlyData.read(dec_path)
    dec_pc = np.array([dec_ply['vertex']['x'], dec_ply['vertex']['y'], dec_ply['vertex']['z']]).transpose()
    dec_pc = np.expand_dims(dec_pc, axis=0)

    # check if the number of points are the same
    is_point_num_equal = bool(len(ori_ply['vertex']['x']) == len(dec_ply['vertex']['x']))
    if is_point_num_equal:
        emd = emd_samples(ori_pc, dec_pc)

    ### compressed file size ###
    ori_size = (os.stat(ori_path).st_size) / 1000 # kB
    bin_size = (os.stat(bin_path).st_size) / 1000 # kB
    compression_ratio = bin_size / ori_size # kB
    bpp = (bin_size * 1000 * 8) / len(ori_ply['vertex']['x']) # bits per points

    with open(log_path, 'a') as f:
        lines = [f'Point Cloud Compression Evaluation\n',
                 f'ply1: {ori_path}\n',
                 f'ply2: {dec_path}\n',
                 f'======================================\n',
                 f'ori_file size (kB)  : {ori_size}\n',
                 f'bin_file size (kB)  : {bin_size}\n',
                 f'compression ratio   : {compression_ratio}\n',
                 f'bpp (bits per point): {bpp}\n'
                 f'======================================\n',
                 f'Asym. Chamfer dist. (1->2) p2pt: {acd1_pt}\n',
                 f'Asym. Chamfer dist. (2->1) p2pt: {acd2_pt}\n',
                 f'Chamfer dist.              p2pt: {cd_pt}\n',
                 f'CD-PSNR                    p2pt: {cd_psnr_pt}\n',
                 f'Hausdorff distance         p2pt: {hausdorff_pt}\n',
                 f'======================================\n',
                 f'Asym. Chamfer dist. (1->2) p2pl: {acd1_pl}\n',
                 f'Asym. Chamfer dist. (2->1) p2pl: {acd2_pl}\n',
                 f'Chamfer dist.              p2pl: {cd_pl}\n',
                 f'CD-PSNR                    p2pl: {cd_psnr_pl}\n',
                 f'Hausdorff distance         p2pl: {hausdorff_pl}\n',
                 f'======================================\n']
        f.writelines(lines)
        if is_point_num_equal:
            f.write(f'Earth Mover\'s dist.            : {emd}')
Ejemplo n.º 40
0
def test_emd_samples_4():
    first_array = [1, 2, 3, 4, 5]
    second_array = [99, 98, 97, 96, 95]
    emd_assert(emd_samples(first_array, second_array), 78.4)
Ejemplo n.º 41
0
	d[bowler] = a

for bowler1 in d.keys():
	a = d[bowler1]
	differences[bowler1] = {}
	maximum_score[bowler1] = 0.0
	bowler_list.append(bowler1)
	
	for bowler2 in d.keys():
	
		if ((bowler1 in players) and (bowler2 in players)):
			b = d[bowler2]
			
			a = a.astype('float')
			b = b.astype('float')
			c = emd_samples(a, b, bins=40)
			
			differences[bowler1][bowler2] = c
			
			maximum_score[bowler1] = max(maximum_score[bowler1], c)
			
			#print bowler1 + "," + bowler2 + "," + str(c)


for bowler1 in players:
	for bowler2 in players:
		differences[bowler1][bowler2] /= maximum_score[bowler1]



print 'bowler', 
Ejemplo n.º 42
0
def test_emd_samples_5():
    first_array = [1]
    second_array = [1, 2, 3, 4, 5]
    emd_assert(emd_samples(first_array, second_array), 1.8)
Ejemplo n.º 43
0
def compute_emd_distributions(d1, d2, bins=50):
    return pyemd.emd_samples(d1, d2, bins)