def emd_analyze():
    histograms1 = get_histograms_by_net(net1,
                                        layer_index_value1,
                                        inputs1,
                                        bins=bins)
    histograms2 = get_histograms_by_net(net2,
                                        layer_index_value2,
                                        inputs2,
                                        bins=bins)

    emd_AxB = np.zeros([A, B])
    for i in range(A):
        for j in range(B):
            emd_AxB[i, j] = emd(histograms1[i], histograms2[j])

    histogramsAB = np.concatenate([histograms1, histograms2])
    emd_ABxAB = np.zeros([A + B, B + A])
    for i in range(A + B):
        for j in range(B + A):
            emd_ABxAB[i, j] = emd(histogramsAB[i], histogramsAB[j])

    emd_A = emd_ABxAB[:, :6]
    emd_B = emd_ABxAB[:, 6:]
    a = emd_A.mean(axis=1)
    b = emd_B.mean(axis=1)
    c = np.stack([a, b], axis=1)

    emd_inputs = cal_emd(inputs1, inputs2, bins=bins)
    emd_same = cal_emd(inputs1, inputs1, bins=bins)
Ejemplo n.º 2
0
def earth_mover_distance(y1, y2):
    """
	
	Example
	-------
	
	::
		
		# create signal
		t = _np.arange(0, 10000) * 1e-5
		y1 = _xr.DataArray(np.sin(2 * _np.pi * 1e3 * t))
		
		# perform EMD over a range of noise values
		results = []
		amplitudes = 10 ** _np.arange(-4, 1.1, 0.1)
		for amp in amplitudes:
			y1_noisy = y1.copy() + (_np.random.rand(len(t)) - 0.5) * amp
			results.append(earth_mover_distance(y1, y1_noisy))
			
		# plot results
		fig, ax = _plt.subplots()
		ax.plot(amplitudes, results, marker='x')
		ax.set_xscale('log')
		ax.set_yscale('log')
		ax.set_xlabel('Noise amplitude')
		ax.set_ylabel('EMD')

 
	"""

    from scipy.stats import wasserstein_distance as emd

    return emd(y1, y2)
def cal_emd(inputs1, inputs2, bins=10):
    if isinstance(inputs1, Variable):
        inputs1_np = inputs1.cpu().data.numpy()
        inputs2_np = inputs2.cpu().data.numpy()
    elif isinstance(inputs1, np.ndarray):
        inputs1_np = inputs1
        inputs2_np = inputs2
    inputs1_gt0 = inputs1_np[inputs1_np > 0]
    inputs2_gt0 = inputs2_np[inputs2_np > 0]
    inputs1_gt0_lt1 = inputs1_gt0[inputs1_gt0 <= 1]
    inputs2_gt0_lt1 = inputs2_gt0[inputs2_gt0 <= 1]

    inputs1_gt1_count = (inputs1_np > 1).sum()
    inputs2_gt1_count = (inputs2_np > 1).sum()

    input1_np_his = np.histogram(inputs1_gt0_lt1, bins=bins)
    input2_np_his = np.histogram(inputs2_gt0_lt1, bins=bins)
    input1_norm = input1_np_his[0].astype(float) / input1_np_his[0].sum()
    input2_norm = input2_np_his[0].astype(float) / input2_np_his[0].sum()
    input1_norm_with_gt1 = np.append(
        input1_norm,
        float(inputs1_gt1_count) / input1_np_his[0].sum())
    input2_norm_with_gt1 = np.append(
        input2_norm,
        float(inputs2_gt1_count) / input2_np_his[0].sum())

    emd_inputs = emd(input1_norm_with_gt1, input2_norm_with_gt1)
    return emd_inputs
def main_backup():
    net1 = 'ResNet152'

    # convIndex is the index for submodel conv, starting from 1
    from .publicVariables import layer_index_value_list
    layer_index_value1 = layer_index_value_list[net1]

    model1, snapshot_path, query_path, database_path = load_net_params(net1)
    dset_test, dset_database = load_dset_params(job_dataset)

    layer_index = layer_index_value1[-2]
    sub_model1 = getConvLayerByIndex(model1, layer_index, net1)
    feature_out1 = sub_model1(inputs)
    feature_np1 = feature_out1.cpu().data.numpy()

    net2 = 'ResNext101_32x4d'
    # convIndex is the index for submodel conv, starting from 1

    layer_index_value2 = layer_index_value_list[net2]

    model2, snapshot_path, query_path, database_path = load_net_params(net2)
    dset_test, dset_database = load_dset_params(job_dataset)

    layer_index2 = layer_index_value2[-1]
    sub_model2 = getConvLayerByIndex(model2, layer_index2, net2)
    feature_out2 = sub_model2(inputs)
    feature_np2 = feature_out2.cpu().data.numpy()

    his_feature1 = np.histogram(feature_np1[feature_np1 <= 1])
    his_feature2 = np.histogram(feature_np2[feature_np2 <= 1])
    his_normalized_1 = his_feature1[0].astype(float) / his_feature1[0].sum()
    his_normalized_2 = his_feature2[0].astype(float) / his_feature2[0].sum()

    from scipy.stats import wasserstein_distance as emd
    emd1_2 = emd(his_normalized_1, his_normalized_2)
    emd2_1 = emd(his_normalized_2, his_normalized_1)
    print(emd1_2, emd2_1)
Ejemplo n.º 5
0
def costMatrix(row_feats, col_feats, row_labels, col_labels, metric="Pearson"):

    """
    Compute the matching cost matrix between two label sets, given
    their features, labels, and a metric.  Costs are computed using either
    the Pearson correlation coefficient, the Dice coefficient.

    Parameters:
    - - - - -
        row_feats, col_feats : float, array
            feature data each each vertex
        row_labels, col_labels : int, arrary
            cortical parcellation vectors
        metric : string
            metric to use to build a similarity matrix. 
            The matrix index values will be mnipulated accordingly to
            generate positive, integer-valued costs.
    """

    # Get unique label values in non-moving and moving brain
    row_labs = np.asarray(list(set(row_labels).difference({-1, 0})))
    col_labs = np.asarray(list(set(col_labels).difference({-1, 0})))

    # Initialize cost matrix
    costMatrix = np.zeros((len(row_labs), len(col_labs)))
    print(costMatrix.shape)

    # Compute pairwise costs between all label sets
    for i, r in enumerate(row_labs):
        indr = np.where(row_labels == r)[0]
        lr = len(indr)

        if metric in ["Spearman","Euclidean","Pearson"]:
            featr = row_feats[indr, :]

        for j, c in enumerate(col_labs):
            indc = np.where(col_labels == c)[0]
            
            if metric in ["Spearman","Euclidean","Pearson"]:
                featc = col_feats[indc, :]

            if metric == "Spearman":
                [rVal, _] = spearmanr(featr, featc, axis=1)
                rVal = 1-rVal[lr:, 0:lr]

            elif metric == "Pearson":
                rVal = cdist(featr, featc, metric='Correlation').mean()

            elif metric == "Euclidean":
                rVal = cdist(featr, featc).mean()

            elif metric == "Dice":
                rVal = 1-hmg.dice(indr, indc)

            elif metric == "EMD":
                rmu = row_feats[indr, :].mean(0)
                rmu  = rmu/rmu.sum()

                cmu = col_feats[indc, :].mean(0)
                cmu = cmu/cmu.sum()

                rVal = emd(rmu, cmu)


            costMatrix[i, j] = rVal

    return [row_labs, col_labs, costMatrix]