# load the input image print("[INFO] processing image {}/{}".format(i + 1, len(imagePaths))) image = cv2.imread(imagePath) # compute the hash for the image and convert it h = dhash(image) h = convert_hash(h) # update the hashes dictionary l = hashes.get(h, []) l.append(imagePath) hashes[h] = l # build the VP-Tree print("[INFO] building VP-Tree...") points = list(hashes.keys()) tree = vptree.VPTree(points, hamming) # serialize the VP-Tree to disk print("[INFO] serializing VP-Tree...") f = open(args["tree"], "wb") f.write(pickle.dumps(tree)) f.close() # serialize the hashes to dictionary print("[INFO] serializing hashes...") f = open(args["hashes"], "wb") f.write(pickle.dumps(hashes)) f.close()
def test_zero_neighbors_raises_valueerror(self): tree = vptree.VPTree([1, 2, 3], euclidean) self.assertRaises(ValueError, tree.get_n_nearest_neighbors, [1], 0)
hash_dict = {} # Loop over the images paths for (i, p) in enumerate(images_paths): # Load the image from disk print("[INFO] processing image {}/{}".format(i + 1, len(images_paths))) image = cv2.imread(p) # If the image is None then we could not load it from disk (so skip it) if image is None: continue # Compute the hash h = convert_hash(dhash(image)) # Update the dictionary list_images = hash_dict.get(h, []) list_images.append(p) hash_dict[h] = list_images # Save dictionary to database hash_filename = database_path + ".dict.pickle" with open(hash_filename, "wb") as f: pickle.dump(hash_dict, f) print("[INFO] Hash data saved: " + hash_filename) print("[INFO] Generating VPTree...") tree = vptree.VPTree(list(hash_dict.keys()), hamming) tree_filename = database_path + ".tree.pickle" with open(tree_filename, "wb") as f: pickle.dump(tree, f) print("[INFO] VPTree generated and saved: " + tree_filename)
def train_tree(features_list,dist_measure): tree = vptree.VPTree(features_list,dist_measure) return tree
def load_database(self, database_file_path): if hasattr(self, "ahash_database"): del self.ahash_database del self.ahash_tree self.ahash_database = {} self.ahash_tree = None if hasattr(self, "dhash_database"): del self.dhash_database del self.dhash_tree self.dhash_database = {} self.dhash_tree = None if hasattr(self, "phash_database"): del self.phash_database del self.phash_tree self.phash_database = {} self.phash_tree = None if hasattr(self, "whash_database"): del self.whash_database del self.whash_tree self.whash_database = {} self.whash_tree = None gc.collect() database_loading_time = time.time() with open(database_file_path, "r") as jsonfile: lines = jsonfile.readlines() for line in lines: twitter_photo = json.loads(line.strip()) if not twitter_photo[ "ahash" ] in self.ahash_database: self.ahash_database[ twitter_photo[ "ahash" ] ] = [] if not twitter_photo[ "dhash" ] in self.dhash_database: self.dhash_database[ twitter_photo[ "dhash" ] ] = [] if not twitter_photo[ "phash" ] in self.phash_database: self.phash_database[ twitter_photo[ "phash" ] ] = [] if not twitter_photo[ "whash" ] in self.whash_database: self.whash_database[ twitter_photo[ "whash" ] ] = [] self.ahash_database[ twitter_photo[ "ahash" ] ].append(twitter_photo[ "id" ]) self.dhash_database[ twitter_photo[ "dhash" ] ].append(twitter_photo[ "id" ]) self.phash_database[ twitter_photo[ "phash" ] ].append(twitter_photo[ "id" ]) self.whash_database[ twitter_photo[ "whash" ] ].append(twitter_photo[ "id" ]) print("[info] Database loading ({} ms)".format(int((time.time() - database_loading_time) * 1000))) ahash_vptree_building_time = time.time() if len(self.ahash_database) != 0: self.ahash_tree = vptree.VPTree(list(self.ahash_database.keys()), self.hamming) print("[info] ahash vptree building ({} ms)".format(int((time.time() - ahash_vptree_building_time) * 1000))) dhash_vptree_building_time = time.time() if len(self.dhash_database) != 0: self.dhash_tree = vptree.VPTree(list(self.dhash_database.keys()), self.hamming) print("[info] dhash vptree building ({} ms)".format(int((time.time() - dhash_vptree_building_time) * 1000))) phash_vptree_building_time = time.time() if len(self.phash_database) != 0: self.phash_tree = vptree.VPTree(list(self.phash_database.keys()), self.hamming) print("[info] phash vptree building ({} ms)".format(int((time.time() - phash_vptree_building_time) * 1000))) whash_vptree_building_time = time.time() if len(self.whash_database) != 0: self.whash_tree = vptree.VPTree(list(self.whash_database.keys()), self.hamming) print("[info] whash vptree building ({} ms)".format(int((time.time() - whash_vptree_building_time) * 1000)))
import vptree # Define distance function. def euclidean(p1, p2): return np.sqrt(np.sum(np.power(p2 - p1, 2))) # Generate some random points. data = [] with open('data.txt', 'r') as file: data = np.array(eval(file.read())) # Build tree in O(n log n) time complexity. start = time.time() tree = vptree.VPTree(data, euclidean) py_const_time = time.time() - start # Query single point. py_results = [] py_times = [] with open('queries.txt', 'r') as file: for line in file.readlines(): query = eval(line) start = time.time() py_results.append(tree.get_n_nearest_neighbors(query, 10000)) py_times.append(time.time() - start) cpp_results = [] with open('results.txt', 'r') as file: i = 0
import numpy as np import vptree # Define distance function. def euclidean(p1, p2): return np.sqrt(np.sum(np.power(p2 - p1, 2))) # Generate some random points. points = np.random.randn(20000, 10) query = [.5] * 10 # Build tree in O(n log n) time complexity. tree = vptree.VPTree(points, euclidean) # Query single point. tree.get_nearest_neighbor(query) # Query n-points. tree.get_n_nearest_neighbors(query, 10) # Get all points within certain distance. out = tree.get_all_in_range(query, 3.14) print out
return normalize([v], norm='l2')[0] def similarity(v1, v2): return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)) def cosineDistanceMatching(poseVector1, poseVector2): cosineSimilarity = similarity(poseVector1, poseVector2) distance = 2 * (1 - cosineSimilarity) return np.sqrt(distance) idx_m = np.mean(poseData, axis=1) tree = vptree.VPTree(poseData, cosineDistanceMatching) # 8 12 16 20 # | | | | # 7 11 15 19 # 4 | | | | # | 6 10 14 18 # 3 | | | | # | 5---9---13--17 # 2 \ / # \ \ / # 1 \ / # \ \ / # ------0- connections = [
def euclidean(p1, p2): try: return bin(p1^p2).count('1') except TypeError: # print p1, p2 p1 = int(p1) p2 = int(p2) # print p1, p2 # time.sleep(10) return bin(p1^p2).count('1') hash_dict = {} file_list = os.listdir("/Users/sunilku/Desktop/hackathon/misc/") for i in file_list: if(i[-3:]=="bmp" or i[-3:]=="jpg"): hash_dict[i] = int(str(imagehash.phash(Image.open("/Users/sunilku/Desktop/hackathon/misc/"+i))),16) print i, hash_dict[i] hashes = [int(i) for i in hash_dict.values()] tree = vptree.VPTree(hashes, euclidean) file1 = open("hash_dict.pkl","wb") file2 = open("tree.pkl", "wb") pickle.dump(hash_dict, file1) pickle.dump(tree, file2) file1.close() file2.close()