Example #1
0
def test_lsh_index_positive():
    p = falconn.get_default_parameters(n, d)
    t = falconn.LSHIndex(p)
    dataset = np.random.randn(n, d).astype(np.float32)
    t.setup(dataset)

    def is_int(x):
        try:
            res = isinstance(x, (int, long))
            return res
        except NameError:
            res = isinstance(x, int)
            return res

    def test_positive(q):
        u = np.random.randn(d).astype(np.float32)
        assert isinstance(q.find_k_nearest_neighbors(u, 10), list)
        assert isinstance(q.find_near_neighbors(u, 10.0), list)
        assert is_int(q.find_nearest_neighbor(u))
        assert isinstance(q.get_candidates_with_duplicates(u), list)
        assert is_int(q.get_max_num_candidates())
        assert is_int(q.get_num_probes())
        assert isinstance(q.get_query_statistics(), falconn.QueryStatistics)
        assert isinstance(q.get_unique_candidates(u), list)
        assert q.reset_query_statistics() is None
        assert q.set_max_num_candidates(100) is None
        assert q.set_num_probes(10) is None

    q = t.construct_query_object()
    test_positive(q)
    q = t.construct_query_pool()
    test_positive(q)
Example #2
0
    def setup_hash_tables(self, data, threads=0, probes=50):
        """Creates hash tables for an efficient approximate nearest neighbor
        search

        Args:
            data (numpy.ndarray): matrix where each row is a unique vector
            threads (int): the number of threads desired to setup the
                 Locality Sensitive Hash hash tables. If the number of threads is 0
                 the maximum number of available hardware threads found will be used
                 up to the number of hash tables 10. 0 is selected by default.
            probes (int): the number of probes each query will make over all the
                 hash tables. (The higher number of probes the more accurate the search,
                 but the longer it will take [Needs Verification]).

        Returns
            query object from falconn to search the created table.
        """
        import falconn
        params = falconn.get_default_parameters(data.shape[0], len(self.seed))
        params.num_setup_threads = threads
        table = falconn.LSHIndex(params)
        table.setup(data)
        query = table.construct_query_object()
        query.set_num_probes(probes)
        return query
Example #3
0
    def __init__(self, feature_file, label_file, port, worker_num=10):
        self.url_worker = 'inproc://ping-workers'
        url_router = "tcp://*:%s" % port
        self.worker_num = worker_num
        self.worker_counts = Array('i', [0] * worker_num)
        self.context = zmq.Context()
        self.router = self.context.socket(zmq.ROUTER)
        self.router.bind(url_router)
        self.workers = self.context.socket(zmq.DEALER)
        self.workers.bind(self.url_worker)

        self.label = np.load(label_file)
        logger.info("start load feature data")
        t1 = time.time()
        self.feature = np.load(feature_file)
        t2 = time.time()
        logger.info("load cost time:%f" % (t2 - t1))
        dp = fc.get_default_parameters(self.feature.shape[0],
                                       self.feature.shape[1],
                                       fc.DistanceFunction.EuclideanSquared)
        ds = fc.LSHIndex(dp)
        train_st = time.time()
        ds.setup(self.feature)
        train_et = time.time()
        logger.info("train cost time:%f" % (train_et - train_st))
        # self.qo = ds.construct_query_object()
        self.qp = ds.construct_query_pool()
def set_cp(data):
    """
    d = 128
    seed = 119417657
    # Cross polytope hashing
    params_cp = falconn.LSHConstructionParameters()
    params_cp.dimension = d
    params_cp.lsh_family = falconn.LSHFamily.CrossPolytope
    params_cp.distance_function = falconn.DistanceFunction.NegativeInnerProduct
    params_cp.storage_hash_table = falconn.StorageHashTable.FlatHashTable
    params_cp.k = 3
    params_cp.l = 10
    params_cp.num_setup_threads = 0
    params_cp.last_cp_dimension = 16
    params_cp.num_rotations = 3
    params_cp.seed = seed ^ 833840234
    """
    num_points, dim = data.shape
    parms = falconn.get_default_parameters(num_points, dim)
    falconn.compute_number_of_hash_functions(7, parms)

    cp_table = falconn.LSHIndex(parms)
    cp_table.setup(data)
    qo = cp_table.construct_query_object()
    qo.set_num_probes(896)
    return qo
Example #5
0
 def get_clusters_falconn(self):
     serializable_list = []
     vector_numpy_ndarray = np.array(self.vector_matrix)
     vector_numpy_ndarray /= np.linalg.norm(vector_numpy_ndarray).reshape(-1, 1)
     center = np.mean(vector_numpy_ndarray)
     vector_numpy_ndarray -= center
     falconn_params = falconn.get_default_parameters(len(self.vector_matrix), len(self.vector_matrix[0]))
     falconn_params.distance_function = "euclidean_squared"
     lsh_index = falconn.LSHIndex(falconn_params)
     lsh_index.setup(vector_numpy_ndarray)
     i = 0
     for vector in self.vector_matrix:
         cluster = lsh_index.find_near_neighbors(np.array(vector), self.similarity_threshold)
         cluster = cluster + (i,)
         i += 1
         if len(cluster) < 2:
             continue
         similarity_cluster = SimilarityCluster(self.similarity_threshold,
                                                self.vector_id_list[cluster[0]],
                                                self.vector_matrix[cluster[0]],
                                                self.start_time_ms,
                                                self.end_time_ms)
         for index in cluster:
             if index == cluster[0]:
                 continue
             similarity_cluster.similar_image_ids.append(self.vector_id_list[index])
             similarity_cluster.apply_vector_to_average(self.vector_matrix[index])
         serializable_list.append(similarity_cluster.to_serializable_object())
     return serializable_list
Example #6
0
 def retrival(self, query, dataset=None, *, k=None, threshold=None):
     if dataset is None:
         table = self.last_table
     else:
         hashint = xxhash.xxh64(dataset[:, 0].copy(), self.seed).intdigest()
         if hashint in self.tables:
             table = self.tables[hashint]
         else:
             print('find a new dataset')
             dataset = dataset.astype(np.float32)
             mean = np.mean(dataset, axis=0)
             dataset -= mean
             params = falconn.get_default_parameters(
                 dataset.shape[0], dataset.shape[1])
             falconn.compute_number_of_hash_functions(7, params)
             lsh_index = falconn.LSHIndex(params)
             lsh_index.setup(dataset)
             qtable = lsh_index.construct_query_object()
             qtable.set_num_probes(10000)
             table = (mean, qtable)
             self.tables[hashint] = table
     if table is None:
         raise Exception("Dataset not specific")
     query -= table[0]
     if k is not None and threshold is not None:
         raise ValueError("k and threshold should not pass simultaneously")
     self.last_table = table
     if k is not None:
         return table[1].find_k_nearest_neighbors(query, k)
     if threshold is not None:
         return table[1].find_near_neighbors(query, threshold)
     return table[1].find_nearest_neighbor(query)
Example #7
0
def runTest():
    m_bad = 0
    m_right = 0
    m_num = 0
    for main_times in range(0, times):
        if resetTest:
            resetRandom()
        test = np.load(os.path.join(path, test_file_name))
        train = np.load(os.path.join(path, train_file_name))
        testNum = len(test)
        trainNum = len(train)
        p = falconn.get_default_parameters(trainNum, dim)
        t = falconn.LSHIndex(p)
        dataset = [np.ravel(x[0]).astype(np.float32) for x in train]
        print len(dataset)
        dataset = np.array(dataset)
        t.setup(dataset)
        if is_pool:
            q = t.construct_query_pool()
        else:
            q = t.construct_query_object()
        t2 = time.time()
        for i in test:
            t1 = time.time()
            #print(i)
            i[0] = np.ravel(i[0])
            tList = train[q.find_k_nearest_neighbors(i[0], k)]
            is_true = False
            for l in tList:
                            
                if is_big_key:
                    if ks[l[1]] == ks[i[1]]:
                        is_true = True
                        break
                else:
                    if l[1] == i[1]:
                        is_true = True
                        break
            if is_true:
                m_right += 1
            else:
                m_bad += 1
                if is_log:
                    if is_big_key:
                        logging.error('###### Bad %s(%s: %s) with %s' (ks[i[1]], i[1], i[2], tList))
                    else:
                        logging.error('###### Bad %s: %s with %s' (i[1], i[2], tList))
            m_num += 1
            if m_num % reportTime == 1:
                logging.info('Last accuracy: %.2f %%' %
                             (m_right / float(m_num) * 100.0))
                logging.info('Last loss: %.2f %%' %
                             (m_bad / float(m_num) * 100.0))
                logging.info('right: %d bad: %d now: %d/%d Time: %.5fs/1iter' %
                             (m_right, m_bad, m_num, testNum * times, (time.time() - t1)))
        logging.info('Speed Time: %.8f' % ((time.time() - t2) / testNum))
    logging.info('Last accuracy: %.2f %% (%d/%d)' % ((m_right / float(m_num) * 100.0), m_right, m_num))
    logging.info('Last loss: %.2f %% (%d/%d)' % ((m_bad / float(m_num) * 100.0), m_bad, m_num))
    logging.info('End Run Test')
 def hash_construct(self, features):
     dp = fc.get_default_parameters(features.shape[0], features.shape[1],
                                    fc.DistanceFunction.EuclideanSquared)
     dp.l = 20
     ds = fc.LSHIndex(dp)
     train_st = time.time()
     ds.setup(features)
     train_et = time.time()
     print("### hash train time:%f" % (train_et - train_st))
     return ds.construct_query_object()
Example #9
0
def test_get_default_parameters():
    n = 100000
    dim = 128
    dist_func = 'negative_inner_product'
    params = falconn.get_default_parameters(n, dim, dist_func, True)
    assert params.l == 10
    assert params.lsh_family == 'cross_polytope'
    assert params.k == 2
    assert params.dimension == dim
    assert params.distance_function == dist_func
    assert params.num_rotations == 1
    assert params.last_cp_dimension == 64
Example #10
0
def test_get_default_parameters():
  n = 100000
  dim = 128
  dist_func = 'negative_inner_product'
  params = falconn.get_default_parameters(n, dim, dist_func, True)
  assert params.l == 10
  assert params.lsh_family == 'cross_polytope'
  assert params.k == 2
  assert params.dimension == dim
  assert params.distance_function == dist_func
  assert params.num_rotations == 1
  assert params.last_cp_dimension == 64
Example #11
0
def hashing(hash_input):
    """
    Usage: generate hash code for static dataset
    :param hash_input: a list that has two dimensions.
    :return: a pointer, pointing to a falconn hash table.
    """

    parameters = falconn.get_default_parameters(len(hash_input),
                                                len(hash_input[0]))
    lsh = falconn.LSHIndex(parameters)
    lsh.setup(hash_input)
    query_table = lsh.construct_query_object()
    return query_table
Example #12
0
def main():
    parser = argparse.ArgumentParser()
    mode = parser.add_mutually_exclusive_group()
    mode.add_argument("--database", metavar="FILENAME", default="database.txt")
    mode.add_argument("--numpy-database", metavar="FILENAME")

    parser.add_argument("--test-vector",
                        metavar="FILENAME",
                        default="test-vector.txt")
    parser.add_argument("--limit", metavar="MAX", type=int, default=-1)
    parser.add_argument(
        "--params",
        choices=("hyperplane", "crosspolytope", "default"),
        default="default",
    )

    parser.add_argument("--probes", type=int, default=2464)

    args = parser.parse_args()

    start_read_db = time.monotonic_ns()
    if args.numpy_database:
        db, mean = read_numpy_database(args.numpy_database)
    else:
        db = read_database(args.database, args.limit)
        db = db.astype(np.float32)
        mean = np.mean(db, axis=0)
        db -= mean
        save_numpy_database(db, mean, args.database)
    end_read_db = time.monotonic_ns()

    print(
        f"Reading database {(end_read_db-start_read_db) / 1000000.0:.3f}ms",
        file=sys.stderr,
    )

    num_points = len(db)
    dimensions = len(db[0])

    if args.params == "default":
        index_params = falconn.get_default_parameters(num_points=num_points,
                                                      dimension=dimensions)
    elif args.params == "hyperplane":
        index_params = hyperplane_hashing_params(dimensions=dimensions)
    elif args.params == "crosspolytope":
        index_params = cross_polytope_hashing_params(dimensions=dimensions)
    else:
        raise ValueError(f"Unknown params: {args.params}")

    test_queries(db, args.probes, mean, index_params, args.test_vector)
Example #13
0
def test_get_default_parameters():
    n = 100000
    dim = 128
    dist_func = 'negative_inner_product'
    params = falconn.get_default_parameters(n, dim, dist_func, True)
    assert params.l == 10
    assert params.lsh_family == 'cross_polytope'
    assert params.storage_hash_table == 'bit_packed_flat_hash_table'
    assert params.num_setup_threads == 0
    assert params.k == 2
    assert params.dimension == dim
    assert params.distance_function == dist_func
    assert params.num_rotations == 1
    assert params.last_cp_dimension == 64
Example #14
0
    def add(self, vecs):
        self.center = np.mean(vecs, axis=0)  # Subtract mean vector later
        self.params_cp = falconn.get_default_parameters(
            num_points=vecs.shape[0],
            dimension=vecs.shape[1],
            distance=falconn.DistanceFunction.EuclideanSquared,
            is_sufficiently_dense=True)
        # self.params_cp.num_setup_threads = 0  # Single thread mode
        bit = int(np.round(np.log2(vecs.shape[0])))
        falconn.compute_number_of_hash_functions(bit, self.params_cp)

        self.table = falconn.LSHIndex(self.params_cp)
        self.table.setup(vecs - self.center)
        self.query_object = self.table.construct_query_object()
Example #15
0
def test_get_default_parameters():
    n = 100000
    dim = 128
    dist_func = "negative_inner_product"
    params = falconn.get_default_parameters(n, dim, dist_func, True)
    assert params.l == 10
    assert params.lsh_family == "cross_polytope"
    assert params.storage_hash_table == "bit_packed_flat_hash_table"
    assert params.num_setup_threads == 0
    assert params.k == 2
    assert params.dimension == dim
    assert params.distance_function == dist_func
    assert params.num_rotations == 1
    assert params.last_cp_dimension == 64
Example #16
0
def init_hash():
    # 获得数组
    train=np.array(load_all_beOne(path))
    # 获取数组数量
    trainNum=len(train)
    # 获得默认参数
    p=falconn.get_default_parameters(trainNum, dim)
    t=falconn.LSHIndex(p)
    dataset=[np.ravel(x[0]).astype(np.float32) for x in train]
    dataset=np.array(dataset)
    # 生成hash
    logging.info('Start Hash setup')
    t.setup(dataset)
    if is_pool:
        q=t.construct_query_pool()
    else:
        q=t.construct_query_object()
    return (q, train)
Example #17
0
def main():
    iris = load_iris()
    X = iris.data
    y = iris.target

    X_train, X_test, y_train, y_test = train_test_split(X, y)

    params = get_default_parameters(X_train.shape[0], X_train.shape[1])
    lsh_index = LSHIndex(params)
    lsh_index.setup(X_train)
    lsh_query = lsh_index.construct_query_object()

    x = X_test[0]
    results = lsh_query.find_k_nearest_neighbors(x, 3)
    print(y_test[0])
    print(results)
    print(y_train[results])

    print('DONE')
Example #18
0
    def fit(self, X: np.ndarray, y: np.ndarray = None):
        """ Setup the LSH index from training data.

        Parameters
        ----------
        X: np.array
            Data to be indexed
        y: any
            Ignored

        Returns
        -------
        self: FalconnLSH
            An instance of LSH with a built index
        """
        X = check_array(X, dtype=[np.float32, np.float64])

        if self.metric in ['euclidean', 'l2', 'minkowski']:
            self.metric = 'euclidean'
            distance = falconn.DistanceFunction.EuclideanSquared
        elif self.metric in ['squared_euclidean', 'sqeuclidean']:
            self.metric = 'sqeuclidean'
            distance = falconn.DistanceFunction.EuclideanSquared
        elif self.metric in ['cosine', 'NegativeInnerProduct', 'neg_inner']:
            self.metric = 'cosine'
            distance = falconn.DistanceFunction.NegativeInnerProduct
        else:
            warnings.warn(
                f'Invalid metric "{self.metric}". Using "euclidean" instead')
            self.metric = 'euclidean'
            distance = falconn.DistanceFunction.EuclideanSquared

        # Set up the LSH index
        lsh_construction_params = falconn.get_default_parameters(
            *X.shape, distance=distance)
        lsh_index = falconn.LSHIndex(lsh_construction_params)
        lsh_index.setup(X)

        self.X_train_ = X
        self.y_train_ = y
        self.index_ = lsh_index

        return self
Example #19
0
    def __init__(self, feature_file, label_file, id_feature_file,
                 id_label_file):
        self.idfeature = np.load(id_feature_file)
        self.idlabel = np.load(id_label_file)

        self.label = np.load(label_file)
        print "start load feature data"
        t1 = time.time()
        feature = np.load(feature_file)
        t2 = time.time()
        print("load cost time:%f" % (t2 - t1))
        dp = fc.get_default_parameters(feature.shape[0], feature.shape[1],
                                       fc.DistanceFunction.EuclideanSquared)
        ds = fc.LSHIndex(dp)
        train_st = time.time()
        ds.setup(feature)
        train_et = time.time()
        print("train cost time:%f" % (train_et - train_st))
        self.qo = ds.construct_query_object()
Example #20
0
 def load_identifier(self,labelFile,featuresFile):
     self.label = np.load( labelFile)
     print "start load feature data"
     print(labelFile)
     t1 = time.time()
     self.feature = np.load(featuresFile)
     self.embs = self.feature
     print ("feature dtype:%d", self.feature.dtype)
     t2 = time.time()
     print ("load cost time:%f" % (t2 - t1))
     self.dp = fc.get_default_parameters(self.feature.shape[0], self.feature.shape[1],
                                         fc.DistanceFunction.EuclideanSquared)
     self.dp.l = 30
     self.ds = fc.LSHIndex(self.dp)
     train_st = time.time()
     self.ds.setup(self.feature)
     train_et = time.time()
     print ("train cost time:%f" % (train_et - train_st))
     self.qo = self.ds.construct_query_object()
Example #21
0
def test_lsh_index_positive():
    n = 1000
    d = 128
    p = falconn.get_default_parameters(n, d)
    t = falconn.LSHIndex(p)
    dataset = np.random.randn(n, d).astype(np.float32)
    t.fit(dataset)
    u = np.random.randn(d).astype(np.float32)
    t.find_k_nearest_neighbors(u, 10)
    t.find_near_neighbors(u, 10.0)
    t.find_nearest_neighbor(u)
    t.get_candidates_with_duplicates(u)
    t.get_max_num_candidates()
    t.get_num_probes()
    t.get_query_statistics()
    t.get_unique_candidates(u)
    t.get_unique_sorted_candidates(u)
    t.reset_query_statistics()
    t.set_max_num_candidates(100)
    t.set_num_probes(10)
Example #22
0
def test_lsh_index_positive():
  n = 1000
  d = 128
  p = falconn.get_default_parameters(n, d)
  t = falconn.LSHIndex(p)
  dataset = np.random.randn(n, d).astype(np.float32)
  t.setup(dataset)
  u = np.random.randn(d).astype(np.float32)
  t.find_k_nearest_neighbors(u, 10)
  t.find_near_neighbors(u, 10.0)
  t.find_nearest_neighbor(u)
  t.get_candidates_with_duplicates(u)
  t.get_max_num_candidates()
  t.get_num_probes()
  t.get_query_statistics()
  t.get_unique_candidates(u)
  #t.get_unique_sorted_candidates(u)
  t.reset_query_statistics()
  t.set_max_num_candidates(100)
  t.set_num_probes(10)
Example #23
0
def lsh_sieve(full_deltas, d, n):

    deltas = np.reshape(full_deltas, (n, d))
    centred_deltas = (deltas - np.mean(deltas, axis=0))

    params = falconn.get_default_parameters(n, d)
    fln = falconn.LSHIndex(params)
    fln.setup(centred_deltas)
    qob = fln.construct_query_object()

    # Greedy merge within a distance
    # all_sets = list()

    full_grad = np.zeros(d)

    for i in range(n):
        neighbors = qob.find_near_neighbors(centred_deltas[i], 1.0 / d)
        # print str(i) + " has " + str(neighbors)
        full_grad = full_grad + (deltas[i] / len(neighbors))

    return full_grad
Example #24
0
def setup_lsh():

    # extract the signature matrix from database

    con = psycopg2.connect("dbname=yinhan user=yinhan")
    cur = con.cursor()
    cur.execute("SELECT SIGNATURE FROM AKAFINGER")
    lst = cur.fetchall()
    con.commit()
    con.close()

    data = np.array([val[0] for val in lst])
    center = np.mean(data, axis=0)
    data = data - center
    # use the center of the data base to center snippet
    # allegedly to improve the model performance
    params_cp = falconn.get_default_parameters(num_points=data.shape[0],
                                               dimension=data.shape[1])
    table = falconn.LSHIndex(params_cp)
    table.setup(data)

    return center, table.construct_query_object()
 def init_falconn():
     dim = 2048
     # 获得数组
     my_feature = np.load(
         os.path.join(model_path, 'tensorflow-feature.npy'))
     print my_feature.shape
     my_class_name = np.load(
         os.path.join(model_path, 'tensorflow-class_name.npy'))
     print my_class_name.shape
     my_file_path = np.load(
         os.path.join(model_path, 'tensorflow-file_path.npy'))
     print my_file_path.shape
     # 获取数组数量
     trainNum = len(my_feature)
     # 获得默认参数
     p = falconn.get_default_parameters(trainNum, dim)
     t = falconn.LSHIndex(p)
     dataset = my_feature
     # 生成hash
     t.setup(dataset)
     q = t.construct_query_pool()
     return my_feature, my_class_name, my_file_path, q
Example #26
0
def init_hash():
    global my_arr, my_id, big_class
    # 获得数组
    my_arr = np.load(os.path.join(path, 'array.npy'))
    my_id = np.load(os.path.join(path, 'id.npy'))
    f = open(os.path.join(path, 'big_class.txt'),'r')  
    a = f.read()  
    big_class = eval(a)  
    f.close()
    # 获取数组数量
    trainNum=len(my_arr)
    # 获得默认参数
    p=falconn.get_default_parameters(trainNum, dim)
    t=falconn.LSHIndex(p)
    dataset = my_arr
    # 生成hash
    logging.info('Start Hash setup')
    t.setup(dataset)
    if is_pool:
        q=t.construct_query_pool()
    else:
        q=t.construct_query_object()
    return q
Example #27
0
def init_hash():
    global my_arr, my_id, big_class
    # 获得数组
    my_arr = np.load(os.path.join(path, 'array.npy'))
    my_id = np.load(os.path.join(path, 'id.npy'))
    f = open(os.path.join(path, 'big_class.txt'), 'r')
    a = f.read()
    big_class = eval(a)
    f.close()
    # 获取数组数量
    trainNum = len(my_arr)
    # 获得默认参数
    p = falconn.get_default_parameters(trainNum, dim)
    t = falconn.LSHIndex(p)
    dataset = my_arr
    # 生成hash
    logging.info('Start Hash setup')
    t.setup(dataset)
    if is_pool:
        q = t.construct_query_pool()
    else:
        q = t.construct_query_object()
    return q
Example #28
0
    def build_lsh(self, all_signatures):
        """
		take signatures of songs to build a LSH table, and the query object

		params:
			all_signatures: all signatures from the database
		
		returns:
			a falconn hash table;
			a pointer pointing to the falconn hash table
			None if not successful

		"""

        if all_signatures.shape[0] == 0:
            raise ValueError("All signatures must not be empty.")

        params = falconn.get_default_parameters(all_signatures.shape[0],
                                                all_signatures.shape[1])

        # center the dataset to improve performance:
        all_signatures -= np.mean(all_signatures, axis=0)

        # Create the LSH table
        print('Constructing the LSH table...')
        table = falconn.LSHIndex(params)
        table.setup(all_signatures)

        print('Constructing the queries...')
        query_object = table.construct_query_object()

        self.table = table
        self.query_object = query_object

        if not table or not query_object:
            return None
Example #29
0
def getPara_forLsh(datasetShape):
    num_points, dim = datasetShape
    para = falconn.get_default_parameters(num_points, dim)
    para.distance_function = "euclidean_squared"  # vanilla eu
    return para
Example #30
0
def test_lsh_index_negative():
    n = 1000
    d = 128
    p = falconn.get_default_parameters(n, d)
    t = falconn.LSHIndex(p)
    try:
        t.find_nearest_neighbor(np.random.randn(d))
        assert False
    except RuntimeError:
        pass
    try:
        dataset = [[1.0, 2.0], [3.0, 4.0]]
        t.fit(dataset)
        assert False
    except TypeError:
        pass
    try:
        dataset = np.random.randn(n, d).astype(np.int32)
        t.fit(dataset)
        assert False
    except ValueError:
        pass
    try:
        dataset = np.random.randn(10, 10, 10)
        t.fit(dataset)
        assert False
    except ValueError:
        pass
    dataset = np.random.randn(n, d).astype(np.float32)
    t.fit(dataset)
    dataset = np.random.randn(n, d).astype(np.float64)
    t.fit(dataset)
    u = np.random.randn(d).astype(np.float64)

    try:
        t.find_k_nearest_neighbors(u, 0.5)
        assert False
    except TypeError:
        pass

    try:
        t.find_k_nearest_neighbors(u, -1)
        assert False
    except ValueError:
        pass

    try:
        t.find_near_neighbors(u, -1)
        assert False
    except ValueError:
        pass

    try:
        t.set_max_num_candidates(0.5)
        assert False
    except TypeError:
        pass
    try:
        t.set_max_num_candidates(-10)
        assert False
    except ValueError:
        pass
    t.set_num_probes(t._params.l)
    try:
        t.set_num_probes(t._params.l - 1)
        assert False
    except ValueError:
        pass
    try:
        t.set_num_probes(1000.1)
        assert False
    except TypeError:
        pass

    def check_check_query(f):
        try:
            f(u.astype(np.float32))
            assert False
        except ValueError:
            pass
        try:
            f([0.0] * d)
            assert False
        except TypeError:
            pass
        try:
            f(u[:d - 1])
            assert False
        except ValueError:
            pass
        try:
            f(np.random.randn(d, d))
            assert False
        except ValueError:
            pass

    check_check_query(lambda u: t.find_k_nearest_neighbors(u, 10))
    check_check_query(lambda u: t.find_near_neighbors(u, 0.5))
    check_check_query(lambda u: t.find_nearest_neighbor(u))
    check_check_query(lambda u: t.get_candidates_with_duplicates(u))
    check_check_query(lambda u: t.get_unique_candidates(u))
    check_check_query(lambda u: t.get_unique_sorted_candidates(u))
    t.find_near_neighbors(u, 0.0)
Example #31
0
import falconn 
par = falconn.LSHConstructionParameters()
param = falconn.get_default_parameters(num_points = len(train), dimension = len(train[0]), distance = falconn.DistanceFunction.EuclideanSquared )
print(param.lsh_family, param.l, param.k)
tables = param.l
hashes = param.k
param.l = int(1.1*tables)
para = []

for k in [hashes,int(hashes*1.5)]:
    param.k = k
    lsh = falconn.LSHIndex(param)
    lsh.setup(train)
      
    startClock = time.clock()
    startTime = process_time()
    indexlsh = lsh.construct_query_object()
    end_time = process_time()
    constructionTime = end_time - startTime
    endClock = time.clock()
    constructionClock= endClock - startClock
  
    for t in [param.l, int(param.l*2), int(param.l*3)]:
        indexlsh.set_num_probes(t)
        
        print('lsh-l'+str(param.l)+'k'+str(param.k)+'t'+str(t))
        
        rez = []
        for q in qry:
            startClock = time.clock()
            startTime = process_time()
Example #32
0
    print(y2[:k])


import numpy as np
import falconn

if __name__ == '__main__':
    a1 = np.load('outputs_1.npy')
    a2 = np.load('outputs_2.npy')
    y = np.load('labels.npy')
    print(y.shape)

    a = np.r_[a1, a2]
    n, d = a.shape

    p = falconn.get_default_parameters(n, d)
    t = falconn.LSHIndex(p)
    dataset = a
    t.setup(dataset)

    Q = t.construct_query_object()

    # input
    i, k = 4545, 100
    print(i, k)
    while (True):
        i, k = map(int, input().split())
        q = a[i:i + 1, :]
        u = q.sum(axis=0)

        ans = Q.find_k_nearest_neighbors(u, k)
Example #33
0
    # print(dataBaseInitial)

    dataBase = np.array(dataBaseInitial.iloc[:, 1:dimension], dtype="float32")
    queryBase = queryBaseInitial.iloc[:, 1:dimension]
    # print(dataBase.shape)

    # params_cp = falconn.LSHConstructionParameters()
    # params_cp.dimension = len(dataBase[0])
    # params_cp.lsh_family = falconn.LSHFamily.CrossPolytope
    # params_cp.distance_function = falconn.DistanceFunction.EuclideanSquared
    # params_cp.l = 100
    # params_cp.k = 100
    # params_cp.num_setup_threads = 1
    # params_cp.storage_hash_table = falconn.StorageHashTable.LinearProbingHashTable
    # params_cp.num_rotations = 2
    params_cp = falconn.get_default_parameters(len(dataBase), len(dataBase[0]))
    falconn.compute_number_of_hash_functions(18, params_cp)

    table = falconn.LSHIndex(params_cp)
    table.setup(dataBase)

    query_object = table.construct_query_object()
    number_of_probes = params_cp.l
    query_object.set_num_probes(number_of_probes)

    print('FALCONN方案:')
    res = []
    for k in range(10, 1010, 10):
        print('k={}'.format(k))
        begin_time = time()
        for m in range(queryBase.shape[0]):
Example #34
0
def getPara_forLsh(datasetShape):
    num_points, dim = datasetShape
    para = falconn.get_default_parameters(num_points, dim)
    para.distance_function = "euclidean_squared" # vanilla eu
    return para
Example #35
0
def test_lsh_index_negative():
    p = falconn.get_default_parameters(n, d)
    try:
        t = falconn.LSHIndex(p)
        t.construct_query_object()
        assert False
    except RuntimeError:
        pass
    try:
        t = falconn.LSHIndex(p)
        t.setup([[1.0, 2.0], [3.0, 4.0]])
        assert False
    except TypeError:
        pass
    try:
        t = falconn.LSHIndex(p)
        t.setup(np.random.randn(n, d).astype(np.int32))
        assert False
    except TypeError:
        pass
    try:
        t = falconn.LSHIndex(p)
        t.setup(np.random.randn(10, 10, 10))
        assert False
    except ValueError:
        pass
    try:
        t = falconn.LSHIndex(p)
        t.setup(np.random.randn(n, d))
        t.setup(np.random.randn(n, d))
        assert False
    except RuntimeError:
        pass
    for (t1, t2) in [(np.float32, np.float64), (np.float64, np.float32)]:
        for g in [
                lambda t: t.construct_query_object(),
                lambda t: t.construct_query_pool()
        ]:
            t = falconn.LSHIndex(p)
            t.setup(np.random.randn(n, d).astype(t1))
            q = g(t)
            u = np.random.randn(d).astype(t1)

            try:
                q.find_k_nearest_neighbors(u, 0.5)
                assert False
            except TypeError:
                pass

            try:
                q.find_k_nearest_neighbors(u, -1)
                assert False
            except ValueError:
                pass

            try:
                q.find_near_neighbors(u, -1)
                assert False
            except ValueError:
                pass

            try:
                q.set_max_num_candidates(0.5)
                assert False
            except TypeError:
                pass
            try:
                q.set_max_num_candidates(-10)
                assert False
            except ValueError:
                pass
            q.set_num_probes(t._params.l)
            try:
                q.set_num_probes(t._params.l - 1)
                assert False
            except ValueError:
                pass
            try:
                q.set_num_probes(1000.1)
                assert False
            except TypeError:
                pass

            def check_check_query(f):
                try:
                    f(u.astype(t2))
                    assert False
                except TypeError:
                    pass
                try:
                    f([0.0] * d)
                    assert False
                except TypeError:
                    pass
                try:
                    f(u[:d - 1])
                    assert False
                except ValueError:
                    pass
                try:
                    f(np.random.randn(d, d))
                    assert False
                except ValueError:
                    pass

            check_check_query(lambda u: q.find_k_nearest_neighbors(u, 10))
            check_check_query(lambda u: q.find_near_neighbors(u, 0.5))
            check_check_query(lambda u: q.find_nearest_neighbor(u))
            check_check_query(lambda u: q.get_candidates_with_duplicates(u))
            check_check_query(lambda u: q.get_unique_candidates(u))
Example #36
0
def test_lsh_index_negative():
  n = 1000
  d = 128
  p = falconn.get_default_parameters(n, d)
  t = falconn.LSHIndex(p)
  try:
    t.find_nearest_neighbor(np.random.randn(d))
    assert False
  except RuntimeError:
    pass
  try:
    dataset = [[1.0, 2.0], [3.0, 4.0]]
    t.setup(dataset)
    assert False
  except TypeError:
    pass
  try:
    dataset = np.random.randn(n, d).astype(np.int32)
    t.setup(dataset)
    assert False
  except ValueError:
    pass
  try:
    dataset = np.random.randn(10, 10, 10)
    t.setup(dataset)
    assert False
  except ValueError:
    pass
  dataset = np.random.randn(n, d).astype(np.float32)
  t.setup(dataset)
  dataset = np.random.randn(n, d).astype(np.float64)
  t.setup(dataset)
  u = np.random.randn(d).astype(np.float64)
  
  try:
    t.find_k_nearest_neighbors(u, 0.5)
    assert False
  except TypeError:
    pass

  try:
    t.find_k_nearest_neighbors(u, -1)
    assert False
  except ValueError:
    pass
  
  t.find_near_neighbors(u, -1)
  
  try:
    t.set_max_num_candidates(0.5)
    assert False
  except TypeError:
    pass
  try:
    t.set_max_num_candidates(-10)
    assert False
  except ValueError:
    pass
  t.set_num_probes(t._params.l)
  try:
    t.set_num_probes(t._params.l - 1)
    assert False
  except ValueError:
    pass
  try:
    t.set_num_probes(1000.1)
    assert False
  except TypeError:
    pass

  def check_check_query(f):
    try:
      f(u.astype(np.float32))
      assert False
    except ValueError:
      pass
    try:
      f([0.0] * d)
      assert False
    except TypeError:
      pass
    try:
      f(u[:d-1])
      assert False
    except ValueError:
      pass
    try:
      f(np.random.randn(d, d))
      assert False
    except ValueError:
      pass

  check_check_query(lambda u: t.find_k_nearest_neighbors(u, 10))
  check_check_query(lambda u: t.find_near_neighbors(u, 0.5))
  check_check_query(lambda u: t.find_nearest_neighbor(u))
  check_check_query(lambda u: t.get_candidates_with_duplicates(u))
  check_check_query(lambda u: t.get_unique_candidates(u))
  #check_check_query(lambda u: t.get_unique_sorted_candidates(u))
  t.find_near_neighbors(u, 0.0)