def EMD(saliency_map1, saliency_map2, sub_sample=1/32.0):
    '''
    Earth Mover's Distance measures the distance between two probability distributions
    by how much transformation one distribution would need to undergo to match another
    (EMD=0 for identical distributions).
    Parameters
    ----------
    saliency_map1 : real-valued matrix
        If the two maps are different in shape, saliency_map1 will be resized to match saliency_map2.
    saliency_map2 : real-valued matrix
    Returns
    -------
    EMD : float, positive
    '''
    map2 = np.array(saliency_map2, copy=False)
    # Reduce image size for efficiency of calculation
    map2 = resize(map2, np.round(np.array(map2.shape)*sub_sample), order=3, mode='nearest')
    map1 = resize(saliency_map1, map2.shape, order=3, mode='nearest')
    # Histogram match the images so they have the same mass
    map1 = match_hist(map1, *exposure.cumulative_distribution(map2))
    # Normalize the two maps to sum up to 1,
    # so that the score is independent of the starting amount of mass / spread of fixations of the fixation map
    map1 = normalize(map1, method='sum')
    map2 = normalize(map2, method='sum')
    # Compute EMD with OpenCV
    # - http://docs.opencv.org/modules/imgproc/doc/histograms.html#emd
    # - http://stackoverflow.com/questions/5101004/python-code-for-earth-movers-distance
    # - http://stackoverflow.com/questions/12535715/set-type-for-fromarray-in-opencv-for-python
    r, c = map2.shape
    x, y = np.meshgrid(range(c), range(r))
    signature1 = cv.CreateMat(r*c, 3, cv.CV_32FC1)
    signature2 = cv.CreateMat(r*c, 3, cv.CV_32FC1)
    cv.Convert(cv.fromarray(np.c_[map1.ravel(), x.ravel(), y.ravel()]), signature1)
    cv.Convert(cv.fromarray(np.c_[map2.ravel(), x.ravel(), y.ravel()]), signature2)
    return cv.CalcEMD2(signature2, signature1, cv.CV_DIST_L2)
Beispiel #2
0
def EMD(saliency_map1, saliency_map2, sub_sample=1 / 32.0):

    map2 = np.array(saliency_map2, copy=True)
    # Reduce image size for efficiency of calculation
    map2 = resize(map2,
                  np.round(np.array(map2.shape) * sub_sample),
                  order=3,
                  mode='nearest')
    map1 = resize(saliency_map1, map2.shape, order=3, mode='nearest')
    # Histogram match the images so they have the same mass
    map1 = match_hist(map1, *exposure.cumulative_distribution(map2))
    # Normalize the two maps to sum up to 1,
    # so that the score is independent of the starting amount of mass / spread of fixations of the fixation map
    map1 = normalize(map1, method='sum')
    map2 = normalize(map2, method='sum')
    # Compute EMD with OpenCV
    # - http://docs.opencv.org/modules/imgproc/doc/histograms.html#emd
    # - http://stackoverflow.com/questions/5101004/python-code-for-earth-movers-distance
    # - http://stackoverflow.com/questions/12535715/set-type-for-fromarray-in-opencv-for-python
    r, c = map2.shape
    x, y = np.meshgrid(range(c), range(r))
    signature1 = cv.CreateMat(r * c, 3, cv.CV_32FC1)
    signature2 = cv.CreateMat(r * c, 3, cv.CV_32FC1)
    cv.Convert(cv.fromarray(np.c_[map1.ravel(),
                                  x.ravel(),
                                  y.ravel()]), signature1)
    cv.Convert(cv.fromarray(np.c_[map2.ravel(),
                                  x.ravel(),
                                  y.ravel()]), signature2)
    return cv.CalcEMD2(signature2, signature1, cv.CV_DIST_L2)
Beispiel #3
0
def run_wmd_mix(dir):
    import numpy as np
    import json
    k = 10
    f = open(str(dir) + "/files.json")
    Files = json.loads(f.read())
    f2 = open(str(dir) + "/file_cipin_vec_mapper.json")
    cipins_mapper = json.loads(f2.read())
    f3 = open(str(dir) + "/file_wcd_sim_mapper.json")
    wcd_sims_mapper = json.loads(f3.read())
    f.close()
    f2.close()
    f3.close()
    del (f)
    del (f2)
    del (f3)
    import gc
    gc.collect()
    print "Load complete"
    from cv2 import cv
    sims = []
    # import time
    # start = time.clock()
    from insert_sort import InsertSortItem
    count = 0
    for file in Files[1800:]:
        count += 1
        cipin_gram = cipins_mapper[file]
        c_list = cipin_gram[0]
        d_list = cipin_gram[1]
        word_count = len(d_list)
        c_recover = np.array(c_list, np.float32).reshape(word_count, -1)
        d_recover = np.array(d_list, np.float32)
        signature1 = np.column_stack((np.transpose(d_recover), c_recover))

        wcd_sims = wcd_sims_mapper[file]
        wmd_mix_sims = []
        for wcd_sim_gram in wcd_sims[:k]:
            file_wcd = wcd_sim_gram[0]
            cipin_gram_wcd = cipins_mapper[file_wcd]
            c_list_wcd = cipin_gram_wcd[0]
            d_list_wcd = cipin_gram_wcd[1]
            word_count_wcd = len(d_list_wcd)
            c_recover_wcd = np.array(c_list_wcd,
                                     np.float32).reshape(word_count_wcd, -1)
            d_recover_wcd = np.array(d_list_wcd, np.float32)
            signature2 = np.column_stack(
                (np.transpose(d_recover_wcd), c_recover_wcd))
            pp = cv.fromarray(signature1)
            qq = cv.fromarray(signature2)
            emd = cv.CalcEMD2(pp, qq, cv.CV_DIST_L2)
            wmd_mix_sims.append([file_wcd, emd])
        wmd_mix_sims = sorted(wmd_mix_sims, key=lambda x: x[1], reverse=False)
        for wcd_sim_gram in wcd_sims[k:10 * k]:
            file_wcd = wcd_sim_gram[0]
            cipin_gram_wcd = cipins_mapper[file_wcd]
            c_list_wcd = cipin_gram_wcd[0]
            d_list_wcd = cipin_gram_wcd[1]
            word_count_wcd = len(d_list_wcd)
            c_recover_wcd = np.array(c_list_wcd,
                                     np.float32).reshape(word_count_wcd, -1)
            d_recover_wcd = np.array(d_list_wcd, np.float32)
            # rwmd = getRWMD((c_recover,d_recover),(c_recover_wcd,d_recover_wcd))
            # if InsertTest(wmd_mix_sims, rwmd):
            signature2 = np.column_stack(
                (np.transpose(d_recover_wcd), c_recover_wcd))

            pp = cv.fromarray(signature1)
            qq = cv.fromarray(signature2)
            emd = cv.CalcEMD2(pp, qq, cv.CV_DIST_L2)
            InsertSortItem(wmd_mix_sims, [file_wcd, emd])
        sims.append([wmd_mix_sims, file])
        if count % 100 == 0:
            print count * 1.0 / totalFilesNumber / 5 * 2, totalFilesNumber / 5 * 2 - count
    # end = time.clock()
    # print end-start
    f = open(str(dir) + "/wmd_mix_sim_10k.json", "w")
    data = json.dumps(sims, ensure_ascii=False)
    f.write(data.encode('utf-8'))
Beispiel #4
0
 def emd_(a32, b32):
     return cv.CalcEMD2(a32, b32, cv.CV_DIST_L2)
Beispiel #5
0
def EarthMoversDistance(hist1, hist2):
    signature1 = _make_signature(hist1)
    signature2 = _make_signature(hist2)
    return cv.CalcEMD2(signature1, signature2, EUCLIDEAN_DISTANCE)
Beispiel #6
0
 wcd_sims = wcd_sims_mapper[file]
 wmd_mix_sims = [["Nil", 0]]
 for wcd_sim_gram in wcd_sims[:k]:
     file_wcd = wcd_sim_gram[0]
     cipin_gram_wcd = cipins_mapper[file_wcd]
     c_list_wcd = cipin_gram_wcd[0]
     d_list_wcd = cipin_gram_wcd[1]
     word_count_wcd = len(d_list_wcd)
     c_recover_wcd = np.array(c_list_wcd,
                              np.float32).reshape(word_count_wcd, -1)
     d_recover_wcd = np.array(d_list_wcd, np.float32)
     signature2 = np.column_stack(
         (np.transpose(d_recover_wcd), c_recover_wcd))
     pp = cv.fromarray(signature1)
     qq = cv.fromarray(signature2)
     emd = cv.CalcEMD2(pp, qq, cv.CV_DIST_L2)
     wmd_mix_sims.append([file_wcd, emd])
 top_heap_sort(wmd_mix_sims)
 wmd_mix_sims = wmd_mix_sims[1:]
 for wcd_sim_gram in wcd_sims[k:10 * k]:
     file_wcd = wcd_sim_gram[0]
     cipin_gram_wcd = cipins_mapper[file_wcd]
     c_list_wcd = cipin_gram_wcd[0]
     d_list_wcd = cipin_gram_wcd[1]
     word_count_wcd = len(d_list_wcd)
     c_recover_wcd = np.array(c_list_wcd,
                              np.float32).reshape(word_count_wcd, -1)
     d_recover_wcd = np.array(d_list_wcd, np.float32)
     # rwmd = getRWMD((c_recover,d_recover),(c_recover_wcd,d_recover_wcd))
     # if InsertTest(wmd_mix_sims, rwmd):
     signature2 = np.column_stack(