def EMD(saliency_map1, saliency_map2, sub_sample=1/32.0): ''' Earth Mover's Distance measures the distance between two probability distributions by how much transformation one distribution would need to undergo to match another (EMD=0 for identical distributions). Parameters ---------- saliency_map1 : real-valued matrix If the two maps are different in shape, saliency_map1 will be resized to match saliency_map2. saliency_map2 : real-valued matrix Returns ------- EMD : float, positive ''' map2 = np.array(saliency_map2, copy=False) # Reduce image size for efficiency of calculation map2 = resize(map2, np.round(np.array(map2.shape)*sub_sample), order=3, mode='nearest') map1 = resize(saliency_map1, map2.shape, order=3, mode='nearest') # Histogram match the images so they have the same mass map1 = match_hist(map1, *exposure.cumulative_distribution(map2)) # Normalize the two maps to sum up to 1, # so that the score is independent of the starting amount of mass / spread of fixations of the fixation map map1 = normalize(map1, method='sum') map2 = normalize(map2, method='sum') # Compute EMD with OpenCV # - http://docs.opencv.org/modules/imgproc/doc/histograms.html#emd # - http://stackoverflow.com/questions/5101004/python-code-for-earth-movers-distance # - http://stackoverflow.com/questions/12535715/set-type-for-fromarray-in-opencv-for-python r, c = map2.shape x, y = np.meshgrid(range(c), range(r)) signature1 = cv.CreateMat(r*c, 3, cv.CV_32FC1) signature2 = cv.CreateMat(r*c, 3, cv.CV_32FC1) cv.Convert(cv.fromarray(np.c_[map1.ravel(), x.ravel(), y.ravel()]), signature1) cv.Convert(cv.fromarray(np.c_[map2.ravel(), x.ravel(), y.ravel()]), signature2) return cv.CalcEMD2(signature2, signature1, cv.CV_DIST_L2)
def EMD(saliency_map1, saliency_map2, sub_sample=1 / 32.0): map2 = np.array(saliency_map2, copy=True) # Reduce image size for efficiency of calculation map2 = resize(map2, np.round(np.array(map2.shape) * sub_sample), order=3, mode='nearest') map1 = resize(saliency_map1, map2.shape, order=3, mode='nearest') # Histogram match the images so they have the same mass map1 = match_hist(map1, *exposure.cumulative_distribution(map2)) # Normalize the two maps to sum up to 1, # so that the score is independent of the starting amount of mass / spread of fixations of the fixation map map1 = normalize(map1, method='sum') map2 = normalize(map2, method='sum') # Compute EMD with OpenCV # - http://docs.opencv.org/modules/imgproc/doc/histograms.html#emd # - http://stackoverflow.com/questions/5101004/python-code-for-earth-movers-distance # - http://stackoverflow.com/questions/12535715/set-type-for-fromarray-in-opencv-for-python r, c = map2.shape x, y = np.meshgrid(range(c), range(r)) signature1 = cv.CreateMat(r * c, 3, cv.CV_32FC1) signature2 = cv.CreateMat(r * c, 3, cv.CV_32FC1) cv.Convert(cv.fromarray(np.c_[map1.ravel(), x.ravel(), y.ravel()]), signature1) cv.Convert(cv.fromarray(np.c_[map2.ravel(), x.ravel(), y.ravel()]), signature2) return cv.CalcEMD2(signature2, signature1, cv.CV_DIST_L2)
def run_wmd_mix(dir): import numpy as np import json k = 10 f = open(str(dir) + "/files.json") Files = json.loads(f.read()) f2 = open(str(dir) + "/file_cipin_vec_mapper.json") cipins_mapper = json.loads(f2.read()) f3 = open(str(dir) + "/file_wcd_sim_mapper.json") wcd_sims_mapper = json.loads(f3.read()) f.close() f2.close() f3.close() del (f) del (f2) del (f3) import gc gc.collect() print "Load complete" from cv2 import cv sims = [] # import time # start = time.clock() from insert_sort import InsertSortItem count = 0 for file in Files[1800:]: count += 1 cipin_gram = cipins_mapper[file] c_list = cipin_gram[0] d_list = cipin_gram[1] word_count = len(d_list) c_recover = np.array(c_list, np.float32).reshape(word_count, -1) d_recover = np.array(d_list, np.float32) signature1 = np.column_stack((np.transpose(d_recover), c_recover)) wcd_sims = wcd_sims_mapper[file] wmd_mix_sims = [] for wcd_sim_gram in wcd_sims[:k]: file_wcd = wcd_sim_gram[0] cipin_gram_wcd = cipins_mapper[file_wcd] c_list_wcd = cipin_gram_wcd[0] d_list_wcd = cipin_gram_wcd[1] word_count_wcd = len(d_list_wcd) c_recover_wcd = np.array(c_list_wcd, np.float32).reshape(word_count_wcd, -1) d_recover_wcd = np.array(d_list_wcd, np.float32) signature2 = np.column_stack( (np.transpose(d_recover_wcd), c_recover_wcd)) pp = cv.fromarray(signature1) qq = cv.fromarray(signature2) emd = cv.CalcEMD2(pp, qq, cv.CV_DIST_L2) wmd_mix_sims.append([file_wcd, emd]) wmd_mix_sims = sorted(wmd_mix_sims, key=lambda x: x[1], reverse=False) for wcd_sim_gram in wcd_sims[k:10 * k]: file_wcd = wcd_sim_gram[0] cipin_gram_wcd = cipins_mapper[file_wcd] c_list_wcd = cipin_gram_wcd[0] d_list_wcd = cipin_gram_wcd[1] word_count_wcd = len(d_list_wcd) c_recover_wcd = np.array(c_list_wcd, np.float32).reshape(word_count_wcd, -1) d_recover_wcd = np.array(d_list_wcd, np.float32) # rwmd = getRWMD((c_recover,d_recover),(c_recover_wcd,d_recover_wcd)) # if InsertTest(wmd_mix_sims, rwmd): signature2 = np.column_stack( (np.transpose(d_recover_wcd), c_recover_wcd)) pp = cv.fromarray(signature1) qq = cv.fromarray(signature2) emd = cv.CalcEMD2(pp, qq, cv.CV_DIST_L2) InsertSortItem(wmd_mix_sims, [file_wcd, emd]) sims.append([wmd_mix_sims, file]) if count % 100 == 0: print count * 1.0 / totalFilesNumber / 5 * 2, totalFilesNumber / 5 * 2 - count # end = time.clock() # print end-start f = open(str(dir) + "/wmd_mix_sim_10k.json", "w") data = json.dumps(sims, ensure_ascii=False) f.write(data.encode('utf-8'))
def emd_(a32, b32): return cv.CalcEMD2(a32, b32, cv.CV_DIST_L2)
def EarthMoversDistance(hist1, hist2): signature1 = _make_signature(hist1) signature2 = _make_signature(hist2) return cv.CalcEMD2(signature1, signature2, EUCLIDEAN_DISTANCE)
wcd_sims = wcd_sims_mapper[file] wmd_mix_sims = [["Nil", 0]] for wcd_sim_gram in wcd_sims[:k]: file_wcd = wcd_sim_gram[0] cipin_gram_wcd = cipins_mapper[file_wcd] c_list_wcd = cipin_gram_wcd[0] d_list_wcd = cipin_gram_wcd[1] word_count_wcd = len(d_list_wcd) c_recover_wcd = np.array(c_list_wcd, np.float32).reshape(word_count_wcd, -1) d_recover_wcd = np.array(d_list_wcd, np.float32) signature2 = np.column_stack( (np.transpose(d_recover_wcd), c_recover_wcd)) pp = cv.fromarray(signature1) qq = cv.fromarray(signature2) emd = cv.CalcEMD2(pp, qq, cv.CV_DIST_L2) wmd_mix_sims.append([file_wcd, emd]) top_heap_sort(wmd_mix_sims) wmd_mix_sims = wmd_mix_sims[1:] for wcd_sim_gram in wcd_sims[k:10 * k]: file_wcd = wcd_sim_gram[0] cipin_gram_wcd = cipins_mapper[file_wcd] c_list_wcd = cipin_gram_wcd[0] d_list_wcd = cipin_gram_wcd[1] word_count_wcd = len(d_list_wcd) c_recover_wcd = np.array(c_list_wcd, np.float32).reshape(word_count_wcd, -1) d_recover_wcd = np.array(d_list_wcd, np.float32) # rwmd = getRWMD((c_recover,d_recover),(c_recover_wcd,d_recover_wcd)) # if InsertTest(wmd_mix_sims, rwmd): signature2 = np.column_stack(