def emd_analyze(): histograms1 = get_histograms_by_net(net1, layer_index_value1, inputs1, bins=bins) histograms2 = get_histograms_by_net(net2, layer_index_value2, inputs2, bins=bins) emd_AxB = np.zeros([A, B]) for i in range(A): for j in range(B): emd_AxB[i, j] = emd(histograms1[i], histograms2[j]) histogramsAB = np.concatenate([histograms1, histograms2]) emd_ABxAB = np.zeros([A + B, B + A]) for i in range(A + B): for j in range(B + A): emd_ABxAB[i, j] = emd(histogramsAB[i], histogramsAB[j]) emd_A = emd_ABxAB[:, :6] emd_B = emd_ABxAB[:, 6:] a = emd_A.mean(axis=1) b = emd_B.mean(axis=1) c = np.stack([a, b], axis=1) emd_inputs = cal_emd(inputs1, inputs2, bins=bins) emd_same = cal_emd(inputs1, inputs1, bins=bins)
def earth_mover_distance(y1, y2): """ Example ------- :: # create signal t = _np.arange(0, 10000) * 1e-5 y1 = _xr.DataArray(np.sin(2 * _np.pi * 1e3 * t)) # perform EMD over a range of noise values results = [] amplitudes = 10 ** _np.arange(-4, 1.1, 0.1) for amp in amplitudes: y1_noisy = y1.copy() + (_np.random.rand(len(t)) - 0.5) * amp results.append(earth_mover_distance(y1, y1_noisy)) # plot results fig, ax = _plt.subplots() ax.plot(amplitudes, results, marker='x') ax.set_xscale('log') ax.set_yscale('log') ax.set_xlabel('Noise amplitude') ax.set_ylabel('EMD') """ from scipy.stats import wasserstein_distance as emd return emd(y1, y2)
def cal_emd(inputs1, inputs2, bins=10): if isinstance(inputs1, Variable): inputs1_np = inputs1.cpu().data.numpy() inputs2_np = inputs2.cpu().data.numpy() elif isinstance(inputs1, np.ndarray): inputs1_np = inputs1 inputs2_np = inputs2 inputs1_gt0 = inputs1_np[inputs1_np > 0] inputs2_gt0 = inputs2_np[inputs2_np > 0] inputs1_gt0_lt1 = inputs1_gt0[inputs1_gt0 <= 1] inputs2_gt0_lt1 = inputs2_gt0[inputs2_gt0 <= 1] inputs1_gt1_count = (inputs1_np > 1).sum() inputs2_gt1_count = (inputs2_np > 1).sum() input1_np_his = np.histogram(inputs1_gt0_lt1, bins=bins) input2_np_his = np.histogram(inputs2_gt0_lt1, bins=bins) input1_norm = input1_np_his[0].astype(float) / input1_np_his[0].sum() input2_norm = input2_np_his[0].astype(float) / input2_np_his[0].sum() input1_norm_with_gt1 = np.append( input1_norm, float(inputs1_gt1_count) / input1_np_his[0].sum()) input2_norm_with_gt1 = np.append( input2_norm, float(inputs2_gt1_count) / input2_np_his[0].sum()) emd_inputs = emd(input1_norm_with_gt1, input2_norm_with_gt1) return emd_inputs
def main_backup(): net1 = 'ResNet152' # convIndex is the index for submodel conv, starting from 1 from .publicVariables import layer_index_value_list layer_index_value1 = layer_index_value_list[net1] model1, snapshot_path, query_path, database_path = load_net_params(net1) dset_test, dset_database = load_dset_params(job_dataset) layer_index = layer_index_value1[-2] sub_model1 = getConvLayerByIndex(model1, layer_index, net1) feature_out1 = sub_model1(inputs) feature_np1 = feature_out1.cpu().data.numpy() net2 = 'ResNext101_32x4d' # convIndex is the index for submodel conv, starting from 1 layer_index_value2 = layer_index_value_list[net2] model2, snapshot_path, query_path, database_path = load_net_params(net2) dset_test, dset_database = load_dset_params(job_dataset) layer_index2 = layer_index_value2[-1] sub_model2 = getConvLayerByIndex(model2, layer_index2, net2) feature_out2 = sub_model2(inputs) feature_np2 = feature_out2.cpu().data.numpy() his_feature1 = np.histogram(feature_np1[feature_np1 <= 1]) his_feature2 = np.histogram(feature_np2[feature_np2 <= 1]) his_normalized_1 = his_feature1[0].astype(float) / his_feature1[0].sum() his_normalized_2 = his_feature2[0].astype(float) / his_feature2[0].sum() from scipy.stats import wasserstein_distance as emd emd1_2 = emd(his_normalized_1, his_normalized_2) emd2_1 = emd(his_normalized_2, his_normalized_1) print(emd1_2, emd2_1)
def costMatrix(row_feats, col_feats, row_labels, col_labels, metric="Pearson"): """ Compute the matching cost matrix between two label sets, given their features, labels, and a metric. Costs are computed using either the Pearson correlation coefficient, the Dice coefficient. Parameters: - - - - - row_feats, col_feats : float, array feature data each each vertex row_labels, col_labels : int, arrary cortical parcellation vectors metric : string metric to use to build a similarity matrix. The matrix index values will be mnipulated accordingly to generate positive, integer-valued costs. """ # Get unique label values in non-moving and moving brain row_labs = np.asarray(list(set(row_labels).difference({-1, 0}))) col_labs = np.asarray(list(set(col_labels).difference({-1, 0}))) # Initialize cost matrix costMatrix = np.zeros((len(row_labs), len(col_labs))) print(costMatrix.shape) # Compute pairwise costs between all label sets for i, r in enumerate(row_labs): indr = np.where(row_labels == r)[0] lr = len(indr) if metric in ["Spearman","Euclidean","Pearson"]: featr = row_feats[indr, :] for j, c in enumerate(col_labs): indc = np.where(col_labels == c)[0] if metric in ["Spearman","Euclidean","Pearson"]: featc = col_feats[indc, :] if metric == "Spearman": [rVal, _] = spearmanr(featr, featc, axis=1) rVal = 1-rVal[lr:, 0:lr] elif metric == "Pearson": rVal = cdist(featr, featc, metric='Correlation').mean() elif metric == "Euclidean": rVal = cdist(featr, featc).mean() elif metric == "Dice": rVal = 1-hmg.dice(indr, indc) elif metric == "EMD": rmu = row_feats[indr, :].mean(0) rmu = rmu/rmu.sum() cmu = col_feats[indc, :].mean(0) cmu = cmu/cmu.sum() rVal = emd(rmu, cmu) costMatrix[i, j] = rVal return [row_labs, col_labs, costMatrix]