Ejemplo n.º 1
0
class ImageLSH(object):
    def __init__(self, input_dir, get_node=default_get_node):
        self.last_time = time.time()
        self.get_node = get_node
        self._create_from_dir_recur(input_dir)

    def record(self):
        last = self.last_time
        self.last_time = time.time()

        print "Spent:",self.last_time - last

    def _create_from_dir_recur(self, input_dir):
        print "Start searching for mat files in", input_dir
        matches = []
        for root, dirnames, filenames in os.walk(input_dir):
            for filename in fnmatch.filter(filenames, '*.mat'):
                matches.append(os.path.join(root, filename))
        print "Found %d files."
        self.record()
        self.lsh = LSHash(8, 784)
        for f in matches:
            n = self.get_node(f)
            self.lsh.index(n.feature, extra_data=n.path)
        print "Done constructing tree."
        self.record()

    def get_nearest_neighbors(self, mat_file, k):
        node = self.get_node(mat_file)
        return self.lsh.query(node.feature, num_results=k)
        print "Done searching. Elapsed: %s" % (time.time() - self.start_time)
Ejemplo n.º 2
0
 def _create_from_dir_recur(self, input_dir):
     print "Start searching for mat files in", input_dir
     matches = []
     for root, dirnames, filenames in os.walk(input_dir):
         for filename in fnmatch.filter(filenames, '*.mat'):
             matches.append(os.path.join(root, filename))
     print "Found %d files."
     self.record()
     self.lsh = LSHash(8, 784)
     for f in matches:
         n = self.get_node(f)
         self.lsh.index(n.feature, extra_data=n.path)
     print "Done constructing tree."
     self.record()
Ejemplo n.º 3
0
    return np.linalg.norm(x-y)

def brute_force(train_points, test_point, k):
    distances = [(p, l2(test_point, p)) for p in train_points]
    sorted_distances = sorted(distances, key=lambda x: x[1])
    return sorted_distances[:k]

if __name__ == "__main__":
    num_dimension = 2000
    num_samples = 100000
    num_test = 10
    k = 5
    X = np.random.uniform(0, 100, size=(num_samples, num_dimension))
    Y = np.random.uniform(0, 100, size=(num_test, num_dimension))
    train_points = [X[i,:] for i in range(num_samples)]
    lsh = LSHash(8, num_dimension)
    start_cons = time.time()
    for i in range(num_samples):
        lsh.index(X[i,:])
    print "done construction in", time.time() - start_cons
    for i in range(num_test):
        test_point = Y[i,:]
        start_time = time.time()
        lsh_neighbors = lsh.query(test_point, num_results=k, distance_func='true_euclidean')
        done_lsh = time.time()
        brute_force_neighbors = brute_force(train_points, test_point, k)
        done_brute_force = time.time()
        print "lsh in:", done_lsh-start_time
        print "brute-force in:", done_brute_force-done_lsh
        assert len(lsh_neighbors) == k
        assert len(brute_force_neighbors) == k