Ejemplo n.º 1
0
    # load the input image
    print("[INFO] processing image {}/{}".format(i + 1,
            len(imagePaths)))
    image = cv2.imread(imagePath)
    
    # compute the hash for the image and convert it
    h = dhash(image)
    h = convert_hash(h)
    
    # update the hashes dictionary
    l = hashes.get(h, [])
    l.append(imagePath)
    hashes[h] = l

# build the VP-Tree
print("[INFO] building VP-Tree...")
points = list(hashes.keys())
tree = vptree.VPTree(points, hamming)

# serialize the VP-Tree to disk
print("[INFO] serializing VP-Tree...")
f = open(args["tree"], "wb")
f.write(pickle.dumps(tree))
f.close()

# serialize the hashes to dictionary
print("[INFO] serializing hashes...")
f = open(args["hashes"], "wb")
f.write(pickle.dumps(hashes))
f.close()
Ejemplo n.º 2
0
 def test_zero_neighbors_raises_valueerror(self):
     tree = vptree.VPTree([1, 2, 3], euclidean)
     self.assertRaises(ValueError, tree.get_n_nearest_neighbors, [1], 0)
Ejemplo n.º 3
0
hash_dict = {}

# Loop over the images paths
for (i, p) in enumerate(images_paths):
    # Load the image from disk
    print("[INFO] processing image {}/{}".format(i + 1, len(images_paths)))
    image = cv2.imread(p)
    # If the image is None then we could not load it from disk (so skip it)
    if image is None:
        continue
    # Compute the hash
    h = convert_hash(dhash(image))
    # Update the dictionary
    list_images = hash_dict.get(h, [])
    list_images.append(p)
    hash_dict[h] = list_images

# Save dictionary to database
hash_filename = database_path + ".dict.pickle"
with open(hash_filename, "wb") as f:
    pickle.dump(hash_dict, f)
    print("[INFO] Hash data saved: " + hash_filename)

print("[INFO] Generating VPTree...")
tree = vptree.VPTree(list(hash_dict.keys()), hamming)

tree_filename = database_path + ".tree.pickle"
with open(tree_filename, "wb") as f:
    pickle.dump(tree, f)
    print("[INFO] VPTree generated and saved: " + tree_filename)
Ejemplo n.º 4
0
def train_tree(features_list,dist_measure):
	tree = vptree.VPTree(features_list,dist_measure)
	return tree
Ejemplo n.º 5
0
    def load_database(self, database_file_path):
        if hasattr(self, "ahash_database"):
            del self.ahash_database
            del self.ahash_tree

        self.ahash_database = {}
        self.ahash_tree = None

        if hasattr(self, "dhash_database"):
            del self.dhash_database
            del self.dhash_tree

        self.dhash_database = {}
        self.dhash_tree = None

        if hasattr(self, "phash_database"):
            del self.phash_database
            del self.phash_tree

        self.phash_database = {}
        self.phash_tree = None

        if hasattr(self, "whash_database"):
            del self.whash_database
            del self.whash_tree

        self.whash_database = {}
        self.whash_tree = None

        gc.collect()

        database_loading_time = time.time()

        with open(database_file_path, "r") as jsonfile:
            lines = jsonfile.readlines()

            for line in lines:
                twitter_photo = json.loads(line.strip())

                if not twitter_photo[ "ahash" ] in self.ahash_database:
                    self.ahash_database[ twitter_photo[ "ahash" ] ] = []

                if not twitter_photo[ "dhash" ] in self.dhash_database:
                    self.dhash_database[ twitter_photo[ "dhash" ] ] = []

                if not twitter_photo[ "phash" ] in self.phash_database:
                    self.phash_database[ twitter_photo[ "phash" ] ] = []

                if not twitter_photo[ "whash" ] in self.whash_database:
                    self.whash_database[ twitter_photo[ "whash" ] ] = []

                self.ahash_database[ twitter_photo[ "ahash" ] ].append(twitter_photo[ "id" ])
                self.dhash_database[ twitter_photo[ "dhash" ] ].append(twitter_photo[ "id" ])
                self.phash_database[ twitter_photo[ "phash" ] ].append(twitter_photo[ "id" ])
                self.whash_database[ twitter_photo[ "whash" ] ].append(twitter_photo[ "id" ])

        print("[info] Database loading ({} ms)".format(int((time.time() - database_loading_time) * 1000)))

        ahash_vptree_building_time = time.time()

        if len(self.ahash_database) != 0:
            self.ahash_tree = vptree.VPTree(list(self.ahash_database.keys()), self.hamming)

        print("[info] ahash vptree building ({} ms)".format(int((time.time() - ahash_vptree_building_time) * 1000)))

        dhash_vptree_building_time = time.time()

        if len(self.dhash_database) != 0:
            self.dhash_tree = vptree.VPTree(list(self.dhash_database.keys()), self.hamming)

        print("[info] dhash vptree building ({} ms)".format(int((time.time() - dhash_vptree_building_time) * 1000)))

        phash_vptree_building_time = time.time()

        if len(self.phash_database) != 0:
            self.phash_tree = vptree.VPTree(list(self.phash_database.keys()), self.hamming)

        print("[info] phash vptree building ({} ms)".format(int((time.time() - phash_vptree_building_time) * 1000)))

        whash_vptree_building_time = time.time()

        if len(self.whash_database) != 0:
            self.whash_tree = vptree.VPTree(list(self.whash_database.keys()), self.hamming)

        print("[info] whash vptree building ({} ms)".format(int((time.time() - whash_vptree_building_time) * 1000)))
import vptree


# Define distance function.
def euclidean(p1, p2):
    return np.sqrt(np.sum(np.power(p2 - p1, 2)))


# Generate some random points.
data = []
with open('data.txt', 'r') as file:
    data = np.array(eval(file.read()))

# Build tree in O(n log n) time complexity.
start = time.time()
tree = vptree.VPTree(data, euclidean)
py_const_time = time.time() - start

# Query single point.
py_results = []
py_times = []
with open('queries.txt', 'r') as file:
    for line in file.readlines():
        query = eval(line)
        start = time.time()
        py_results.append(tree.get_n_nearest_neighbors(query, 10000))
        py_times.append(time.time() - start)

cpp_results = []
with open('results.txt', 'r') as file:
    i = 0
import numpy as np
import vptree


# Define distance function.
def euclidean(p1, p2):
    return np.sqrt(np.sum(np.power(p2 - p1, 2)))


# Generate some random points.
points = np.random.randn(20000, 10)
query = [.5] * 10

# Build tree in O(n log n) time complexity.
tree = vptree.VPTree(points, euclidean)

# Query single point.
tree.get_nearest_neighbor(query)

# Query n-points.
tree.get_n_nearest_neighbors(query, 10)

# Get all points within certain distance.
out = tree.get_all_in_range(query, 3.14)
print out
Ejemplo n.º 8
0
    return normalize([v], norm='l2')[0]


def similarity(v1, v2):
    return np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))


def cosineDistanceMatching(poseVector1, poseVector2):
    cosineSimilarity = similarity(poseVector1, poseVector2)
    distance = 2 * (1 - cosineSimilarity)
    return np.sqrt(distance)


idx_m = np.mean(poseData, axis=1)
tree = vptree.VPTree(poseData, cosineDistanceMatching)

#        8   12  16  20
#        |   |   |   |
#        7   11  15  19
#    4   |   |   |   |
#    |   6   10  14  18
#    3   |   |   |   |
#    |   5---9---13--17
#    2    \         /
#     \    \       /
#      1    \     /
#       \    \   /
#        ------0-
connections = [
Ejemplo n.º 9
0
def euclidean(p1, p2):
	try:
		return bin(p1^p2).count('1')
	except TypeError:
		# print p1, p2
		p1 = int(p1)
		p2 = int(p2)
		# print p1, p2
		# time.sleep(10)
		return bin(p1^p2).count('1')



hash_dict = {}

file_list = os.listdir("/Users/sunilku/Desktop/hackathon/misc/")
for i in file_list:
	if(i[-3:]=="bmp" or i[-3:]=="jpg"):
		hash_dict[i] = int(str(imagehash.phash(Image.open("/Users/sunilku/Desktop/hackathon/misc/"+i))),16)
		print i, hash_dict[i]

hashes = [int(i) for i in hash_dict.values()]
tree = vptree.VPTree(hashes, euclidean)

file1 = open("hash_dict.pkl","wb")
file2 = open("tree.pkl", "wb")
pickle.dump(hash_dict, file1)
pickle.dump(tree, file2)
file1.close()
file2.close()