def test_emd_samples_validate_distance_matrix_size(): dist = lambda x: [[0, 1], [1, 0]] first_array = [1, 2, 3, 4] second_array = [1, 2, 3, 4] with pytest.raises(ValueError): emd_samples(first_array, second_array, distance=dist)
def wasserstein_randomization(d1_large, d2_large, iters, downsample_size=100): """ Combine synthetic and real data into two sets and randomly divide the data into two new random sets. Check the wasserstein distance (earth movers distance) between these two new muddled sets. Use the measured wasserstein distance to compute the ratio between it and the median of the null distribution (earth movers distance on original set). A ratio of 0 would indicate that the two marginal distributions are identical. From "REALLY USEFUL SYNTHETIC DATA A FRAMEWORK TO EVALUATE THE QUALITY OF DIFFERENTIALLY PRIVATE SYNTHETIC DATA" https://arxiv.org/pdf/2004.07740.pdf NOTE: We return the mean here. However, its best probably to analyze the distribution of the wasserstein score :param d1_large: real data :type d1_large: pandas DataFrame :param d2_large: fake data :type d2_large: pandas DataFrame :param iters: how many iterations to run the randomization :type iters: int :param downsample_size: we downsample the original datasets due to memory constraints :type downsample_size: int :return: wasserstein randomization mean :rtype: float """ # pip install pyemd # https://github.com/wmayner/pyemd from pyemd import emd_samples assert (len(d1_large) == len(d2_large)) d1 = d1_large.sample(n=downsample_size) d2 = d2_large.sample(n=downsample_size) l_1 = len(d1) d3 = np.concatenate((d1, d2)) distances = [] for i in range(iters): np.random.shuffle(d3) n_1, n_2 = d3[:l_1], d3[l_1:] try: # PyEMD is sometimes memory intensive # Let's reduce bins if so dist = emd_samples(n_1, n_2, bins='auto') except MemoryError: dist = emd_samples(n_1, n_2, bins=10) distances.append(dist) # Safety check, to see if there are any valid # measurements if len(distances) == 0: return -1 d_pd = pd.DataFrame(distances) print(d_pd.describe()) return np.mean(np.array(distances))
def empirical_distribution_distance(model_a, model_b, nb_samples, discrete=True): samples_a = model_a.sample(nb_samples) samples_b = model_b.sample(nb_samples) if discrete: return emd_samples(samples_a, samples_b, distance=discrete_distance) # return emd_samples(samples_a, samples_b, bins=2 * len(samples_a)) return emd_samples(samples_a, samples_b)
def emd(data, cluster_col): clusters = data[cluster_col] max_clust = np.max(list(clusters)) cluster_emd_vecs = [] # iterate through clusters for cluster in range(max_clust + 1): cluster_data = data[data[cluster_col] == cluster] cluster_size = len(cluster_data) if cluster_size == 0: #print("Cluster Size 0 error") continue crime_list = list(cluster_data['All Crime']) neighborhood_mean = np.mean(crime_list) sq_mu = math.sqrt(neighborhood_mean) sample_emd = [] # average to eliminate impact of randomness on calculation # make result more consistent for j in range(50): comparison_dist = [] for i in range(len(crime_list)): comparison_dist.append(neighborhood_mean + np.random.uniform(-1.0 * sq_mu, sq_mu)) sample_emd.append(emd_samples(crime_list, comparison_dist)) cluster_emd_vecs.append(np.mean(sample_emd)) return cluster_emd_vecs
def calculate_emd_avg(y_sim, alpha, r0): """Calculates average Earth Movers distance between two distributions For each simulations, takes wait-time distribution and calculates the EMD against the theoretical weibull distribution given parameters at the start of the simulation. Normalised by r0. Args: y_sim (list): (n_sim, n_wait_times) dimension list of wait-times for each simulation, for given reaction channel (esc, epi). Obtained from Channels.wait_times attribute Returns: EMD averaged over all simulations normalised by r0 """ n_sim = len(y_sim) # params for generating theoretical samples (ground truth) k = alpha + 1 # shape beta = (alpha + 1) * (r0 * gamma((alpha + 2) / (alpha + 1)))**(alpha + 1) lam = np.power((alpha + 1) / beta, 1 / (alpha + 1)) # scale n_samples = 100000 emd_list = [] for i in range(n_sim): y_hat = np.array(y_sim[i]) y_true = weibull_min.rvs(k, loc=0, scale=lam, size=n_samples) emd_list.append(emd_samples(y_hat, y_true)) return np.mean(emd_list) * r0
def cal_emd_resamp(A, B, n_samp, times): emds = [] for t in range(times): idx_a = np.random.choice(len(A), n_samp) idx_b = np.random.choice(len(B), n_samp) emds.append(emd_samples(A[idx_a], B[idx_b])) return np.mean(emds)
def calculate_emd(y_sim, alpha, r0): """Calculates Earth Movers distance between two distributions Takes wait-time distribution and calculates the EMD against the theoretical weibull distribution given parameters at the start of the simulation Args: y_sim (list): (n_sim, n_wait_times) list of wait-times for each simulation, for given reaction channel (esc, epi). Obtained from Channels.wait_times attribute Returns: EMD between simulation and true weibull distribution """ y_sim = [item for sublist in y_sim for item in sublist] # flatten list y_sim = np.array(y_sim) n = 10000 # params for generating theoretical samples (ground truth) k = alpha + 1 # shape beta = (alpha + 1) * (r0 * gamma((alpha + 2) / (alpha + 1)))**(alpha + 1) lam = np.power((alpha + 1) / beta, 1 / (alpha + 1)) # scale # generate true samples y_true = weibull_min.rvs(k, loc=0, scale=lam, size=n) d = emd_samples(y_sim, y_true) return d
def earth_mover(self, decisions): # decisions.size() == (batch_size, sample_size, attr_vocab_size) length = decisions.size(-1) indexes = (decisions.float().numpy() >= 0.5) emd = [[ emd_samples(np.arange(length)[index].tolist(), self.samples[0]) if index.sum() > 0 else 1.0 for index in indexes[bid] ] for bid in range(decisions.size(0))] return torch.tensor(emd, dtype=torch.float, device=decisions.device)
def compare_pitchers(one, two): x = pitcher_array(one) z = pitcher_array(two) print(x) print(z) min_len = min(len(x), len(z)) return (emd_samples(x[:min_len], z[:min_len]))
def cal_emd_resamp(A, B, n_samp, times, bins=2): emds = [] if bins is None: bins = 2 for t in range(times): idx_a = np.random.choice(len(A), n_samp) idx_b = np.random.choice(len(B), n_samp) emds.append(emd_samples(A[idx_a], B[idx_b], bins=bins)) return np.mean(emds)
def compute_emd_split_samples(scores, train_scores, scores_sample_size=scores_sample_size): scores_split = np.array_split(scores, len(scores) / scores_sample_size) emd_scores_samples = [ pyemd.emd_samples(split, train_scores, bins=50) for split in scores_split ] return emd_scores_samples
def test_emd_samples_all_kwargs(): # Regression only; not checked by hand dist = lambda x: [[(i - j)**3 for i in range(len(x))] for j in range(len(x))] first_array = [1, 2, 3, 4, 5] second_array = [2, 3, 4, 5] emd_assert( emd_samples(first_array, second_array, bins=30, normalized=False, range=(-5, 15), distance=dist), 24389.0)
def get_value(img, frame): contour1, cnt = get_cnt(img.copy()), get_cnt(frame.copy()) M = cv2.moments(contour1) cx = int(M['m10']/M['m00']) cy = int(M['m01']/M['m00']) dist1 = np.array(map(lambda x: distant((cx, cy), (x[0][0], x[0][1])), contour1)) M = cv2.moments(cnt) cx = int(M['m10']/M['m00']) cy = int(M['m01']/M['m00']) dist2 = np.array(map(lambda x: distant((cx, cy), (x[0][0], x[0][1])), cnt)) v = emd_samples(dist1, dist2, bins=2) return v
def test_emd_samples_all_kwargs(): # Regression only; not checked by hand dist = lambda x: [ [(i - j)**3 for i in range(len(x))] for j in range(len(x)) ] first_array = [1, 2, 3, 4, 5] second_array = [2, 3, 4, 5] emd_assert( emd_samples(first_array, second_array, bins=30, normalized=False, range=(-5, 15), distance=dist), 24389.0 )
def evaluation(file_name): gt_path = os.path.join(args.gt_path, file_name) pre_path = os.path.join(args.pre_path, file_name) assert os.path.exists(gt_path) assert os.path.exists(pre_path) gt_points = np.loadtxt(gt_path) pre_points = np.loadtxt(pre_path) gt2pre, _ = NearestNeighbors( n_neighbors=1, algorithm='auto').fit(pre_points).kneighbors(gt_points) pre2gt, _ = NearestNeighbors( n_neighbors=1, algorithm='auto').fit(gt_points).kneighbors(pre_points) return np.squeeze(gt2pre), np.squeeze(pre2gt), emd_samples( gt_points, pre_points)
def paral_process_similirity(arg): index1 = arg print('index1',index1) # randomNum = random.randint(2,10) # time.sleep(randomNum) vectori = (unidata.values[index1][0:vend]) # 第 i 个曲线向量 #print('vectori:',vectori) # 小波分解处理 mywavestr = 'sym2' mywavlevel = 2 coeffsvi = pywt.wavedec(vectori, mywavestr, level=mywavlevel) waveLdata = coeffsvi[0] vecroti_wave = waveLdata.astype(int) list_ddpearson = [] list_ddbrc = [] list_ddemdwave = [] for vj in range(vsize): # index2=arg # print("index1: ",index1, "index2: ", index2) vectorj = (unidata.values[vj][0:vend]) # 第 j 个曲线向量 # vectorj = (unidata.values[index2][0:vend]) vectorij = np.vstack([vectori, vectorj]) # i,j 向量合并 # 小波分解处理 coeffsvj = pywt.wavedec(vectorj, mywavestr, level=mywavlevel) waveLdata_vj = coeffsvj[0] vecrotj_wave = waveLdata_vj.astype(int) # 小波分解处理完成 # pearson系数 distpearson = pdist(vectorij, 'cosine') list_ddpearson.append(distpearson[0]) # 布雷柯蒂斯距离 distbrc = pdist(vectorij, 'braycurtis') list_ddbrc.append(distbrc[0]) #模态经验分解 distemdwave = emd_samples(vecroti_wave, vecrotj_wave, bins='auto') #print('distemdwave',distemdwave) list_ddemdwave.append(distemdwave) #print('list_ddemdwave:',list_ddemdwave) return list_ddpearson,list_ddbrc,list_ddemdwave
def train_rep(model, lr, X, P, n_iter, c_iter, batch_size, alpha=10, C_reg=1, compute_emd=False, adv=True, verbose=False): """ Train the fair representation using autoencoder provided by the user. Parameters: model: the Pytorch model of the autoencoder. The model should have two members, model.encoder, and the model.decoder. Parameters: lr: learning rate. X: the input features. P: the protected attribute. n_iter: number of iterations. c_iter: the number of iteration to trian the critic inside each training iteration. batch_size: batch size. alpha: the weight of the fairness contraint. Larger means more penalize on the violation of fairness. C_reg: the penalization coefficient of the regularization of the encoder. compute_emd: whether the EMD distance is calculated for each iteration. It may slow the training process significantly. adv: if the model is trained adversarially, i.e. fairly. Setting it false will result in training a normal autoencoder. verbose: if the training process is verbosely printed. """ time_s = time.time() X_0 = X[P == 0] X_1 = X[P == 1] optim_encoder = optim.Adam(model.encoder.parameters(), lr=lr) optim_decoder = optim.Adam(model.decoder.parameters(), lr=lr) optim_crit = optim.Adam(model.critic.parameters(), lr=0.1) l1_crit = nn.L1Loss(size_average=False) n_of_batch = int(len(X) / (batch_size * 2)) * n_iter for i in range(n_of_batch): X_n = X_0[np.random.choice(len(X_0), batch_size)] X_u = X_1[np.random.choice(len(X_1), batch_size)] if adv: w_dist_last = 0 eps = 1 #while w_dist <= 0: while eps >= 1e-3: #while True: for t in range(c_iter): optim_crit.zero_grad() w_dist = model.wdist(X_n, X_u) loss = -w_dist loss.backward(retain_graph=True) optim_crit.step() eps = np.abs(w_dist.data.item() - w_dist_last) # keep training crit until distance no longer decrease w_dist_last = w_dist.data.item() for p in model.critic.parameters(): p.data.clamp_(-0.1, 0.1) # for t in range(c_iter): optim_encoder.zero_grad() optim_decoder.zero_grad() # only use the encoder g mse, wdist = model.forward(X_n, X_u) if adv: loss = mse + wdist * alpha else: loss = mse # L1 regularization reg_loss = 0 #for param in model.encoder.parameters(): # reg_loss += torch.abs(param).sum() for layer in model.encoder: if type(layer) is nn.Linear: #norm = torch.sum(torch.pow(torch.sum(torch.abs(layer.weight), dim=0), 2)) norm = 0.0 for row in layer.weight.transpose(0, 1): norm += torch.sum(torch.pow(row, 2)) reg_loss += norm loss += C_reg * reg_loss loss.backward(retain_graph=True) # use mse and wdist to update g and f optim_encoder.step() optim_decoder.step() text = 'mse: %.4f, critic: %.4f' % (mse.item(), wdist.item()) if compute_emd: g_0 = model.encoder(X_u).detach().cpu().numpy() g_1 = model.encoder(X_n).detach().cpu().numpy() real_emd = emd_samples(g_0, g_1) text += ", emd: %.4f" % real_emd if verbose: update_progress(i, n_of_batch, time_s, text=text + ' ')
if mode == 'all': for v in self.split_data.values(): data.append(v[0]) label.append(v[1]) data, label = (np.vstack(data), np.vstack(label)) else: data, label = self.split_data[mode] idx = [i for i in range(len(data))] np.random.shuffle(idx) idx = np.array(idx) for i in trange(len(idx) // batch_size, ascii=True): x = data[idx[i * batch_size:(i + 1) * batch_size]] y = label[idx[i * batch_size:(i + 1) * batch_size]] yield (x, y) if __name__ == "__main__": data_loader = MNIST('config.yaml') from pyemd import emd_samples label_data, _ = data_loader.getData(mode='label') unlabel_data, _ = data_loader.getData(mode='unlabel') label_data = np.vstack((label_data, unlabel_data[0])) all_data, _ = data_loader.getData() print(emd_samples(label_data.flatten(), all_data[:2000].flatten())) # for x, y in data_loader.next_batch(batch_size=64): # print(y) # plt.imshow(data_loader.getData('label')[0][i].reshape((28,28)), cmap='gray') # plt.show()
aucbor_tmp = 0 sauc_tmp = 0 for pred, gt in zip(predictions, y_valid_cv): res = postprocess_predictions(pred[0], shape_r_out, shape_c_out) res = res / 255 aucjud_tmp += auc_judd(res, gt[2][0, 0]) sim_tmp += similarity(res, gt[0][0, 0]) aucbor_tmp += auc_borji(res, gt[2][0, 0]) nss_tmp += nss_metric(gt[2][0, 0], res) cc_tmp += cc(gt[0][0, 0], res) kl_tmp += kldiv(gt[0][0, 0], res) emdgt = gt[0][0, 0] * 255 emdres = res * 255 emd_tmp += emd_samples(emdgt.flatten(), emdres.flatten(), bins=255) sauc_tmp += auc_shuff(res, gt[2][0, 0], smap) print(emd_tmp / nb_val) sum_nss += nss_tmp / nb_val sum_cc += cc_tmp / nb_val sum_kl += kl_tmp / nb_val sum_emd += emd_tmp / nb_val sum_aucjud += aucjud_tmp / nb_val sum_sim += sim_tmp / nb_val sum_aucbor += aucbor_tmp / nb_val sum_sauc += sauc_tmp / nb_val f.write("{},{},{},{},{},{},{},{}\n".format( aucjud_tmp / nb_val, sim_tmp / nb_val, emd_tmp / nb_val, aucbor_tmp / nb_val, sauc_tmp / nb_val, cc_tmp / nb_val, nss_tmp / nb_val, kl_tmp / nb_val))
import pyemd import numpy as np a =[1,2,3] b = [4,5,6] a = np.array(a) b = np.array(b) res = pyemd.emd_samples(a,b) print(res,type(res))
def test_emd_samples_1_custom_distance(): dist = lambda x: np.array([[0.0 if i == j else 1.0 for i in x] for j in x]) first_array = [1, 2, 3, 4] second_array = [2, 3, 4, 5] emd_assert(emd_samples(first_array, second_array, distance=dist), 0.25)
def test_emd_samples_1_manual_range(): first_array = [1, 2, 3, 4] second_array = [2, 3, 4, 5] emd_assert(emd_samples(first_array, second_array, range=(0, 10)), 1.0)
def test_emd_samples_2(): first_array = [1] second_array = [2] emd_assert(emd_samples(first_array, second_array), 0.5)
d[bowler] = a for bowler1 in d.keys(): a = d[bowler1] differences[bowler1] = {} maximum_score[bowler1] = 0.0 bowler_list.append(bowler1) for bowler2 in d.keys(): if ((bowler1 in players) and (bowler2 in players)): b = d[bowler2] a = a.astype('float') b = b.astype('float') c = emd_samples(a, b, bins=255) differences[bowler1][bowler2] = c maximum_score[bowler1] = max(maximum_score[bowler1], c) #print bowler1 + "," + bowler2 + "," + str(c) for bowler1 in players: for bowler2 in players: differences[bowler1][bowler2] /= maximum_score[bowler1] #tree_file = open('comparing-bowlers.csv', 'w') tree_file = open('comparing-batsmen-' + interpolation_type + '.csv', 'w') writer = csv.writer(tree_file, delimiter=',')
# pitch_dist = leaders(note_pitch) # duration_dist = leaders(note_duration) print('\n', dir_name) # # calculate note pitch/duration # ab_list_pitch = [] # ab_list_duration = [] print("Duration:") # A-x, B-x ab_x_emd = [] ab_x_eud = [] for _ in ab_list_duration: ab_x_emd.append(emd_samples(_, note_duration)) ab_x_eud.append(eud_dis(_, note_duration)) # A - B ab_emd = emd_samples(ab_list_duration[0], ab_list_duration[1]) ab_eud = eud_dis(ab_list_duration[0], ab_list_duration[1]) # diff and ratio diff_emd = sum(ab_x_emd) - ab_emd ratio_emd = sum(ab_x_emd) / ab_emd diff_eud = sum(ab_x_eud) - ab_eud ratio_eud = sum(ab_x_eud) / ab_eud print("diff_emd:{:.3f} ratio_emd:{:.3f} diff_eud:{:.3f} ratio_eud:{:.3f}".format(diff_emd, ratio_emd, diff_eud, ratio_eud))
import numpy as np from pyemd import emd from pyemd import emd_with_flow from pyemd import emd_samples s1 = 8 s2 = 8 np.random.seed(10) a = np.random.rand(s1) b = np.random.rand(s2) d = np.random.rand(s1, s2) result1 = emd(a, b, d) result2 = emd_with_flow(a, b, d) result3 = emd_samples(a, b) print (result1) print ("\n", result2) print (result3)
def test_emd_samples_1(): first_array = [1, 2, 3, 4] second_array = [2, 3, 4, 5] emd_assert(emd_samples(first_array, second_array), 0.75)
def test_emd_samples_4(): first_array = [1, 2, 3, 4, 5] second_array = [99, 98, 97, 96, 95] emd_assert(emd_samples(first_array, second_array), 78.4)
def test_emd_samples_1_binsize(): first_array = [1, 2, 3, 4] second_array = [2, 3, 4, 5] emd_assert(emd_samples(first_array, second_array, bins=2), 0.5)
def test_emd_samples_validate_empty(): first_array = [] second_array = [1] with pytest.raises(ValueError): emd_samples(first_array, second_array)
def test_emd_samples_1_not_normalized(): first_array = [1, 2, 3, 4] second_array = [2, 3, 4, 5] emd_assert(emd_samples(first_array, second_array, normalized=False), 3.0)
def evaluate(ori_path, bin_path, dec_path, log_path): ### mse, psnr, hausdorff, h.psnr ### pc_error = sp.run([ os.path.join(BASE_DIR, 'pc_error/build/pc_error'), '-a', ori_path, '-b', dec_path, '-d' ], stdout=sp.PIPE, universal_newlines=True) for line in pc_error.stdout.splitlines(): m = re.search('(?<=mseF \(p2point\): ).*', line) if (m): mse = m.group() m = re.search('(?<=mseF,PSNR \(p2point\): ).*', line) if (m): psnr = m.group() m = re.search('(?<=h. \(p2point\): ).*', line) if (m): hausdorff = m.group() m = re.search('(?<=h.,PSNR \(p2point\): ).*', line) if (m): h_psnr = m.group() # for line in pc_error.stdout.splitlines(): # m = re.search('(?<=mse2 \(p2point\): ).*', line) # if (m): # mse = m.group() # m = re.search('(?<=mse2,PSNR \(p2point\): ).*', line) # if (m): # psnr = m.group() # m = re.search('(?<=h. 2\(p2point\): ).*', line) # if (m): # hausdorff = m.group() # m = re.search('(?<=h.,PSNR 2\(p2point\): ).*', line) # if (m): # h_psnr = m.group() ### Chamfer distance, EMD ### ori_ply = PlyData.read(ori_path) ori_pc = np.array([ ori_ply['vertex']['x'], ori_ply['vertex']['y'], ori_ply['vertex']['z'] ]) ori_pc = np.transpose(ori_pc) ori_pc = np.expand_dims(ori_pc, axis=0) dec_ply = PlyData.read(dec_path) dec_pc = np.array([ dec_ply['vertex']['x'], dec_ply['vertex']['y'], dec_ply['vertex']['z'] ]) dec_pc = np.transpose(dec_pc) dec_pc = np.expand_dims(dec_pc, axis=0) cd_api = chamfer_distance_api.Chamfer_distance() cd = cd_api.get_chamfer_distance(ori_pc, dec_pc) emd = emd_samples(ori_pc, dec_pc) ### compressed file size ### bin_size = (os.stat(bin_path).st_size) / 1000 # kB with open(log_path, 'a') as f: lines = [ f'Point Cloud Compression Evaluation\n\n', f'ply1: {ori_path}\n', f'ply2: {dec_path}\n\n', f'MSE : {mse}\n', f'PSNR : {psnr}\n', f'Hausdorff distance : {hausdorff}\n', f'H. PSNR : {h_psnr}\n', f'Chamfer distance : {cd}\n', f'Earth Mover\'s distance : {emd}\n\n', f'bin_file size (kB) : {bin_size}\n' ] f.writelines(lines)
def test_emd_samples_3(): first_array = [1, 1, 1, 2, 3] second_array = [1, 2, 2, 2, 3] emd_assert(emd_samples(first_array, second_array), 0.32)
def get_data(): #读取数据,根据cos值构造相似矩阵 #unidata = pd.read_excel('C:/Users/hxk/Desktop/距离测试/distest.xlsx', header=None, skiprows=None) # 训练数据文件 unidata = pd.read_excel( 'E:/Project/螺丝拧紧/第一批数据/原始数据预处理_excel文件/datauni1_512.xlsx', header=None, skiprows=None) # 训练数据文件 datashape = unidata.shape data_rows = datashape[0] #data_cols = datashape[1] test_sub = 1900 vsize = data_rows - test_sub ddpearson = np.zeros((vsize, vsize)) ddbrc = np.zeros((vsize, vsize)) ddkl = np.zeros((vsize, vsize)) ddjs = np.zeros((vsize, vsize)) ddemdwave = np.zeros((vsize, vsize)) vend = 128 # 数据长度 for vi in range(vsize): #vectori = abs(unidata.values[vi][0:vend]) # 第 i 个曲线向量 vectori = (unidata.values[vi][0:vend]) # 第 i 个曲线向量 # 小波分解处理 mywavestr = 'sym2' mywavlevel = 2 coeffsvi = pywt.wavedec(vectori, mywavestr, level=mywavlevel) waveLdata = coeffsvi[0] vecroti_wave = waveLdata.astype(int) # 小波分解处理完成 for vj in range(vsize): #vectorj = abs(unidata.values[vj][0:vend]) # 第 j 个曲线向量 vectorj = (unidata.values[vj][0:vend]) # 第 j 个曲线向量 vectorij = np.vstack([vectori, vectorj]) # i,j 向量合并 # 小波分解处理 coeffsvj = pywt.wavedec(vectorj, mywavestr, level=mywavlevel) waveLdata_vj = coeffsvj[0] vecrotj_wave = waveLdata_vj.astype(int) # 小波分解处理完成 # 为 KL 散度距离准备数据 vectori_sum = sum(vectori) vectorj_sum = sum(vectorj) vkl_i = vectori / vectori_sum + 1e-10 vkl_j = vectorj / vectorj_sum + 1e-10 # 为 KL 散度距离准备数据 # pearson系数 distpearson = pdist(vectorij, 'cosine') ddpearson[vi][vj] = distpearson # 布雷柯蒂斯距离 distbrc = pdist(vectorij, 'braycurtis') ddbrc[vi][vj] = distbrc # JS 散度 # vectork = (vectori + vectorj) / 2 #distjs = 0.5 * scipy.stats.entropy(vectori, vectork) + 0.5 * scipy.stats.entropy(vectorj, vectork) #ddjs[vi][vj] = distjs # KL 散度 #distkl = scipy.stats.entropy(vkl_i, vkl_j) #ddkl[vi][vj] = distkl # EMD 小波处理 distemdwave = emd_samples(vecroti_wave, vecrotj_wave, bins=50) ddemdwave[vi][vj] = distemdwave # EMD小波处理 ddpearsonuni = DDuni(ddpearson) ddbrcuni = DDuni(ddbrc) ddemdwaveuni = DDuni(ddemdwave) #ddjsuni = DDuni(ddjs) #ddkluni = DDuni(ddkl) ddpearsonuni_rate = 0.33 ddbrcuni_rate = 0.33 ddemdwave_rate = 0.33 ddjsuni_rate = 0.25 ddkluni_rate = 0.25 SPCM = ((ddpearsonuni_rate * ddpearsonuni) + (ddbrcuni_rate * ddbrcuni) + (ddemdwave_rate * ddemdwaveuni)) # + (ddjsuni_rate * ddjsuni) + (ddkluni_rate * ddkluni) ) return SPCM, unidata
def test_emd_samples_5(): first_array = [1] second_array = [1, 2, 3, 4, 5] emd_assert(emd_samples(first_array, second_array), 1.8)
def evaluate(ori_path, bin_path, dec_path, log_path): ### acd, cd, cd-psnr, hausdorff ### pc_error = sp.run(['../../evaluator/geo_dist/build/pc_error', '-a', ori_path, '-b', dec_path, '-d'], stdout=sp.PIPE, stderr=sp.DEVNULL, universal_newlines=True) for line in pc_error.stdout.splitlines(): m = re.search('(?<=A->B,ACD1,p2point,).*', line) if (m): acd1_pt = m.group() m = re.search('(?<=A->B,ACD1,p2plane,).*', line) if (m): acd1_pl = m.group() m = re.search('(?<=B->A,ACD2,p2point,).*', line) if (m): acd2_pt = m.group() m = re.search('(?<=B->A,ACD2,p2plane,).*', line) if (m): acd2_pl = m.group() m = re.search('(?<=Symmetric,CD,p2point,).*', line) if (m): cd_pt = m.group() m = re.search('(?<=Symmetric,CD-PSNR,p2point,).*', line) if (m): cd_psnr_pt = m.group() m = re.search('(?<=Symmetric,CD,p2plane,).*', line) if (m): cd_pl = m.group() m = re.search('(?<=Symmetric,CD-PSNR,p2plane,).*', line) if (m): cd_psnr_pl = m.group() m = re.search('(?<=Symmetric,hF,p2point,).*', line) if (m): hausdorff_pt = m.group() m = re.search('(?<=Symmetric,hF,p2plane,).*', line) if (m): hausdorff_pl = m.group() ### EMD ### ori_ply = PlyData.read(ori_path) ori_pc = np.array([ori_ply['vertex']['x'], ori_ply['vertex']['y'], ori_ply['vertex']['z']]).transpose() ori_pc = np.expand_dims(ori_pc, axis=0) dec_ply = PlyData.read(dec_path) dec_pc = np.array([dec_ply['vertex']['x'], dec_ply['vertex']['y'], dec_ply['vertex']['z']]).transpose() dec_pc = np.expand_dims(dec_pc, axis=0) # check if the number of points are the same is_point_num_equal = bool(len(ori_ply['vertex']['x']) == len(dec_ply['vertex']['x'])) if is_point_num_equal: emd = emd_samples(ori_pc, dec_pc) ### compressed file size ### ori_size = (os.stat(ori_path).st_size) / 1000 # kB bin_size = (os.stat(bin_path).st_size) / 1000 # kB compression_ratio = bin_size / ori_size # kB bpp = (bin_size * 1000 * 8) / len(ori_ply['vertex']['x']) # bits per points with open(log_path, 'a') as f: lines = [f'Point Cloud Compression Evaluation\n', f'ply1: {ori_path}\n', f'ply2: {dec_path}\n', f'======================================\n', f'ori_file size (kB) : {ori_size}\n', f'bin_file size (kB) : {bin_size}\n', f'compression ratio : {compression_ratio}\n', f'bpp (bits per point): {bpp}\n' f'======================================\n', f'Asym. Chamfer dist. (1->2) p2pt: {acd1_pt}\n', f'Asym. Chamfer dist. (2->1) p2pt: {acd2_pt}\n', f'Chamfer dist. p2pt: {cd_pt}\n', f'CD-PSNR p2pt: {cd_psnr_pt}\n', f'Hausdorff distance p2pt: {hausdorff_pt}\n', f'======================================\n', f'Asym. Chamfer dist. (1->2) p2pl: {acd1_pl}\n', f'Asym. Chamfer dist. (2->1) p2pl: {acd2_pl}\n', f'Chamfer dist. p2pl: {cd_pl}\n', f'CD-PSNR p2pl: {cd_psnr_pl}\n', f'Hausdorff distance p2pl: {hausdorff_pl}\n', f'======================================\n'] f.writelines(lines) if is_point_num_equal: f.write(f'Earth Mover\'s dist. : {emd}')
d[bowler] = a for bowler1 in d.keys(): a = d[bowler1] differences[bowler1] = {} maximum_score[bowler1] = 0.0 bowler_list.append(bowler1) for bowler2 in d.keys(): if ((bowler1 in players) and (bowler2 in players)): b = d[bowler2] a = a.astype('float') b = b.astype('float') c = emd_samples(a, b, bins=40) differences[bowler1][bowler2] = c maximum_score[bowler1] = max(maximum_score[bowler1], c) #print bowler1 + "," + bowler2 + "," + str(c) for bowler1 in players: for bowler2 in players: differences[bowler1][bowler2] /= maximum_score[bowler1] print 'bowler',
def compute_emd_distributions(d1, d2, bins=50): return pyemd.emd_samples(d1, d2, bins)