コード例 #1
0
def test_ball_tree_pickle():
    rng = check_random_state(0)
    X = rng.random_sample((10, 3))

    bt1 = BallTree(X, leaf_size=1)
    # Test if BallTree with callable metric is picklable
    bt1_pyfunc = BallTree(X, metric=dist_func, leaf_size=1, p=2)

    ind1, dist1 = bt1.query(X)
    ind1_pyfunc, dist1_pyfunc = bt1_pyfunc.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(bt1, protocol=protocol)
        bt2 = pickle.loads(s)

        s_pyfunc = pickle.dumps(bt1_pyfunc, protocol=protocol)
        bt2_pyfunc = pickle.loads(s_pyfunc)

        ind2, dist2 = bt2.query(X)
        ind2_pyfunc, dist2_pyfunc = bt2_pyfunc.query(X)

        assert_array_almost_equal(ind1, ind2)
        assert_array_almost_equal(dist1, dist2)

        assert_array_almost_equal(ind1_pyfunc, ind2_pyfunc)
        assert_array_almost_equal(dist1_pyfunc, dist2_pyfunc)

        assert isinstance(bt2, BallTree)

    for protocol in (0, 1, 2):
        check_pickle_protocol(protocol)
コード例 #2
0
def test_ball_tree_pickle():
    rng = check_random_state(0)
    X = rng.random_sample((10, 3))

    bt1 = BallTree(X, leaf_size=1)
    # Test if BallTree with callable metric is picklable
    bt1_pyfunc = BallTree(X, metric=dist_func, leaf_size=1, p=2)

    ind1, dist1 = bt1.query(X)
    ind1_pyfunc, dist1_pyfunc = bt1_pyfunc.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(bt1, protocol=protocol)
        bt2 = pickle.loads(s)

        s_pyfunc = pickle.dumps(bt1_pyfunc, protocol=protocol)
        bt2_pyfunc = pickle.loads(s_pyfunc)

        ind2, dist2 = bt2.query(X)
        ind2_pyfunc, dist2_pyfunc = bt2_pyfunc.query(X)

        assert_array_almost_equal(ind1, ind2)
        assert_array_almost_equal(dist1, dist2)

        assert_array_almost_equal(ind1_pyfunc, ind2_pyfunc)
        assert_array_almost_equal(dist1_pyfunc, dist2_pyfunc)

        assert isinstance(bt2, BallTree)

    for protocol in (0, 1, 2):
        check_pickle_protocol(protocol)
コード例 #3
0
def test_ball_tree_pickle():
    np.random.seed(0)
    X = np.random.random((10, 3))

    bt1 = BallTree(X, leaf_size=1)
    # Test if BallTree with callable metric is picklable
    bt1_pyfunc = BallTree(X, metric=dist_func, leaf_size=1, p=2)

    ind1, dist1 = bt1.query(X)
    ind1_pyfunc, dist1_pyfunc = bt1_pyfunc.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(bt1, protocol=protocol)
        bt2 = pickle.loads(s)

        s_pyfunc = pickle.dumps(bt1_pyfunc, protocol=protocol)
        bt2_pyfunc = pickle.loads(s_pyfunc)

        ind2, dist2 = bt2.query(X)
        ind2_pyfunc, dist2_pyfunc = bt2_pyfunc.query(X)

        assert_array_almost_equal(ind1, ind2)
        assert_array_almost_equal(dist1, dist2)

        assert_array_almost_equal(ind1_pyfunc, ind2_pyfunc)
        assert_array_almost_equal(dist1_pyfunc, dist2_pyfunc)

    for protocol in (0, 1, 2):
        yield check_pickle_protocol, protocol
コード例 #4
0
def lof(X, k, outlier_threshold=1.5, verbose=False):
    """Knn with KD trees"""
    start = time.time()
    tree = BallTree(X, leaf_size=2)
    distance, index = tree.query(X, k)
    distance, index = distance[:, 1:], index[:, 1:]
    radius = distance[:, -1]
    """Calculate LRD."""
    LRD = np.mean(np.maximum(distance, radius[index]), axis=1)
    r = 1. / np.array(LRD)
    """Calculate outlier score."""
    outlier_score = np.sum(r[index], axis=1) / np.array(r, dtype=np.float16)
    outlier_score *= 1. / k

    # print ('Compute time: %g seconds.' % ((time.time() - start)))

    if verbose:        print("Recording all outliers with outlier score greater than %s." \
              % (outlier_threshold))

    outliers = []
    """ Could parallelize this for loop, but really not worth the overhead...
        Would get insignificant performance gain."""
    for i, score in enumerate(outlier_score):
        if score > outlier_threshold:
            outliers.append([i, X[i], score])

    if verbose:
        print("Detected outliers:")
        print(outliers)

    return outliers
コード例 #5
0
ファイル: views.py プロジェクト: vatsalchanana/image-search
def similar_products2(deep_f):
	qs = Product.objects.all()
	df=read_frame(qs)
	df['idx'] = range(1, len(df) + 1)
	feature_list=[]
	asin_list=[]

	for prod in qs:
		feature_list.append(prod.get_features())
		asin_list.append(prod.asin)
	
		
	nparray = np.asarray(feature_list)
	#print nparray
	tree = BallTree(nparray)              
	dist, ind = tree.query(deep_f, k=5)
	print ind
	index = ind[0]
	recom = index[0:]
	recommended_asins =[];
	
	for i in recom:
		recommended_asins.append(asin_list[i])
	recommended_prods = Product.objects.filter(asin__in = recommended_asins)
	return recommended_prods

#    image_train = graphlab.SFrame(data=df)
#    cur_prod = image_train[18:19]
#    print cur_prod
#    print image_train
#    knn_model = graphlab.nearest_neighbors.create(image_train, features = ['features'],label = 'asin',distance = 'levenshtein',method = 'ball_tree')
#    knn_model.save('my_knn')
#    #knn_model= graphlab.load_model('my_knn')
#    #print knn_model.query(cur_prod)
#    #knn_model = graphlab.nearest_neighbors.create(image_train, features = ['features'],label = 'keywords')
コード例 #6
0
ファイル: views.py プロジェクト: vatsalchanana/image-search
def similar_products(product):
	qs = Product.objects.all()
	df=read_frame(qs)
	df['idx'] = range(1, len(df) + 1)
	feature_list=[]
	asin_list=[]
	product_index = 0
	inn=0
	for prod in qs:
		feature_list.append(prod.get_features())
		asin_list.append(prod.asin)
		if prod.asin == product.asin:
			product_index = inn
		inn+=1
		
	nparray = np.asarray(feature_list)
	#print nparray
	tree = BallTree(nparray)              
	dist, ind = tree.query(nparray[product_index], k=5)
	print ind
	index = ind[0]
	recom = index[1:]
	recommended_asins =[];
	
	for i in recom:
		recommended_asins.append(asin_list[i])
	recommended_prods = Product.objects.filter(asin__in = recommended_asins)
	return recommended_prods
コード例 #7
0
class ClusterModel:
    def __init__(self, sen2vec, corpus_path, corpus_vec_path):
        self.sen2vec = sen2vec

        self._corpus = pd.read_csv(corpus_path)
        self._vectors = load_qa_corpus_vec(corpus_vec_path)
        self._indices = []
        X = []
        for i, v in enumerate(self._vectors):
            if any(v):
                self._indices.append(i)
                X.append(v)
        X = np.array(X)
        # 构建balltree
        self.tree = BallTree(X)

    def __call__(self, sentence, k=1):
        """ 找出与给定句子相似的topk个问题 """
        x = self.sen2vec.sentence2vec([sentence]).reshape(1, -1)
        dist, ind = self.tree.query(x, k=k)
        res = []
        indices = [self._indices[i] for i in ind[0]]
        for i, e in enumerate(indices):
            qa = self._corpus.loc[e]
            res.append((qa['question'], qa['answer'], dist[0][i]))
        return res
コード例 #8
0
def src_nearest_gst_distance(src_pos, gst_pos, nn=1):
    """INCLUDES PATH STRETCH"""
    gst_tree = BallTree(np.deg2rad(gst_pos),
                        metric=DistanceMetric.get_metric("haversine"))
    src_gst_dist, src_gst_ind = gst_tree.query(np.deg2rad(src_pos), k=nn)
    src_gst_dist = haversine_to_km(src_gst_dist)
    src_gst_dist = src_gst_dist * FIBER_PATH_STRETCH
    return src_gst_ind, src_gst_dist
コード例 #9
0
    def check_neighbors(dualtree, breadth_first, k, metric, kwargs):
        bt = BallTree(X, leaf_size=1, metric=metric, **kwargs)
        dist1, ind1 = bt.query(Y, k, dualtree=dualtree, breadth_first=breadth_first)
        dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)

        # don't check indices here: if there are any duplicate distances,
        # the indices may not match.  Distances should not have this problem.
        assert_array_almost_equal(dist1, dist2)
コード例 #10
0
def test_query_haversine():
    rng = check_random_state(0)
    X = 2 * np.pi * rng.random_sample((40, 2))
    bt = BallTree(X, leaf_size=1, metric='haversine')
    dist1, ind1 = bt.query(X, k=5)
    dist2, ind2 = brute_force_neighbors(X, X, k=5, metric='haversine')

    assert_array_almost_equal(dist1, dist2)
    assert_array_almost_equal(ind1, ind2)
コード例 #11
0
    def check_neighbors(dualtree, breadth_first, k, metric, kwargs):
        bt = BallTree(X, leaf_size=1, metric=metric, **kwargs)
        dist1, ind1 = bt.query(Y, k, dualtree=dualtree,
                               breadth_first=breadth_first)
        dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)

        # don't check indices here: if there are any duplicate distances,
        # the indices may not match.  Distances should not have this problem.
        assert_array_almost_equal(dist1, dist2)
コード例 #12
0
def test_query_haversine():
    np.random.seed(0)
    X = 2 * np.pi * np.random.random((40, 2))
    bt = BallTree(X, leaf_size=1, metric='haversine')
    dist1, ind1 = bt.query(X, k=5)
    dist2, ind2 = brute_force_neighbors(X, X, k=5, metric='haversine')

    assert_array_almost_equal(dist1, dist2)
    assert_array_almost_equal(ind1, ind2)
コード例 #13
0
def test_query_haversine():
    rng = check_random_state(0)
    X = 2 * np.pi * rng.random_sample((40, 2))
    bt = BallTree(X, leaf_size=1, metric='haversine')
    dist1, ind1 = bt.query(X, k=5)
    dist2, ind2 = brute_force_neighbors(X, X, k=5, metric='haversine')

    assert_array_almost_equal(dist1, dist2)
    assert_array_almost_equal(ind1, ind2)
コード例 #14
0
def test_ball_tree_query_metrics(metric):
    rng = check_random_state(0)
    if metric in BOOLEAN_METRICS:
        X = rng.random_sample((40, 10)).round(0)
        Y = rng.random_sample((10, 10)).round(0)
    elif metric in DISCRETE_METRICS:
        X = (4 * rng.random_sample((40, 10))).round(0)
        Y = (4 * rng.random_sample((10, 10))).round(0)

    k = 5

    bt = BallTree(X, leaf_size=1, metric=metric)
    dist1, ind1 = bt.query(Y, k)
    dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
    assert_array_almost_equal(dist1, dist2)
コード例 #15
0
def test_ball_tree_query(metric, k, dualtree, breadth_first):
    rng = check_random_state(0)
    X = rng.random_sample((40, DIMENSION))
    Y = rng.random_sample((10, DIMENSION))

    kwargs = METRICS[metric]

    bt = BallTree(X, leaf_size=1, metric=metric, **kwargs)
    dist1, ind1 = bt.query(Y, k, dualtree=dualtree,
                           breadth_first=breadth_first)
    dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)

    # don't check indices here: if there are any duplicate distances,
    # the indices may not match.  Distances should not have this problem.
    assert_array_almost_equal(dist1, dist2)
コード例 #16
0
def test_ball_tree_query_metrics(metric):
    rng = check_random_state(0)
    if metric in BOOLEAN_METRICS:
        X = rng.random_sample((40, 10)).round(0)
        Y = rng.random_sample((10, 10)).round(0)
    elif metric in DISCRETE_METRICS:
        X = (4 * rng.random_sample((40, 10))).round(0)
        Y = (4 * rng.random_sample((10, 10))).round(0)

    k = 5

    bt = BallTree(X, leaf_size=1, metric=metric)
    dist1, ind1 = bt.query(Y, k)
    dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
    assert_array_almost_equal(dist1, dist2)
コード例 #17
0
def test_ball_tree_pickle():
    import pickle
    np.random.seed(0)
    X = np.random.random((10, 3))
    bt1 = BallTree(X, leaf_size=1)
    ind1, dist1 = bt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(bt1, protocol=protocol)
        bt2 = pickle.loads(s)
        ind2, dist2 = bt2.query(X)
        assert_array_almost_equal(ind1, ind2)
        assert_array_almost_equal(dist1, dist2)

    for protocol in (0, 1, 2):
        yield check_pickle_protocol, protocol
コード例 #18
0
def test_ball_tree_pickle():
    import pickle
    np.random.seed(0)
    X = np.random.random((10, 3))
    bt1 = BallTree(X, leaf_size=1)
    ind1, dist1 = bt1.query(X)

    def check_pickle_protocol(protocol):
        s = pickle.dumps(bt1, protocol=protocol)
        bt2 = pickle.loads(s)
        ind2, dist2 = bt2.query(X)
        assert_allclose(ind1, ind2)
        assert_allclose(dist1, dist2)

    for protocol in (0, 1, 2):
        yield check_pickle_protocol, protocol
コード例 #19
0
def test_ball_tree_query(metric, k, dualtree, breadth_first):
    rng = check_random_state(0)
    X = rng.random_sample((40, DIMENSION))
    Y = rng.random_sample((10, DIMENSION))

    kwargs = METRICS[metric]

    bt = BallTree(X, leaf_size=1, metric=metric, **kwargs)
    dist1, ind1 = bt.query(Y,
                           k,
                           dualtree=dualtree,
                           breadth_first=breadth_first)
    dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)

    # don't check indices here: if there are any duplicate distances,
    # the indices may not match.  Distances should not have this problem.
    assert_array_almost_equal(dist1, dist2)
コード例 #20
0
    def query(self, X: np.ndarray, k: Optional[int] = None) -> np.ndarray:
        """
        Returns the k nearest neighbors.

        Parameters:
            X: An array of shape (num_samples, num_features).
            k: The number of neighbors to return.

        Returns:
            An array of shape (num_samples, k) and of type int containing the
            indices of the k nearest nodes.
        """
        if k is None:
            k = self._k
        bt = BallTree(self.nodes, metric="euclidean")
        dist, ind = bt.query(X, k)
        return ind
コード例 #21
0
    def rank(self, cs, yc, ls, lss):

        targets = {l: i for (i, l) in enumerate(ls)}

        # Number of results (lemmas) ranked
        n_results = len(yc)

        # Build ball tree model
        ball_tree = BallTree(yc)

        rs = ball_tree.query(cs, k=n_results, return_distance=False)

        rankings = list()

        for i, (ranking, ls) in enumerate(zip(rs, lss)):

            lsm = [targets[l] for l in ls]
            ranking_array = np.array([(1.0 if i in lsm else 0.0) for i in ranking])
            rankings.append(ranking_array)

        return rankings
コード例 #22
0
ファイル: nlm.py プロジェクト: dsvision/nlm
def _nonlocalmeans_clustered(img, n_small=5, n_components=9, n_neighbors=30, h=10):

    Nw = (2 * n_small + 1) ** 2
    h2 = h * h
    n_rows, n_cols = img.shape

    # precompute the coordinate difference for the big patch
    small_rows, small_cols = np.indices(((2 * n_small + 1), (2 * n_small + 1))) - n_small

    # put all patches so we can cluster them
    n_padded = np.pad(img, n_small, mode='reflect')
    patches = np.zeros((n_rows * n_cols, Nw))

    n = 0
    for r in range(n_small, n_small + n_rows):
        for c in range(n_small, n_small + n_cols):
            window = n_padded[r + small_rows, c + small_cols].flatten()
            patches[n, :] = window
            n += 1

    transformed = PCA(n_components=n_components).fit_transform(patches)
    # index the patches into a tree
    tree = BallTree(transformed, leaf_size=2)

    print("Denoising")
    new_img = np.zeros_like(img)
    for r in range(n_rows):
        for c in range(n_cols):
            idx = r * n_cols + c
            dist, ind = tree.query(transformed[idx], k=n_neighbors)
            ridx = np.array([(int(i / n_cols), int(i % n_cols)) for i in ind[0, 1:]])
            colors = img[ridx[:, 0], ridx[:, 1]]
            w = np.exp(-dist[0, 1:] / h2)
            new_img[r, c] = np.sum(w * colors) / np.sum(w)

    return new_img
コード例 #23
0
 def check_neighbors(metric):
     bt = BallTree(X, leaf_size=1, metric=metric)
     dist1, ind1 = bt.query(Y, k)
     dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
     assert_array_almost_equal(dist1, dist2)
コード例 #24
0
batch_size = 512
X = np.random.random(size=(n_points, d)).astype(np.float32)

res = faiss.StandardGpuResources()
flat_config = faiss.GpuIndexFlatConfig()
flat_config.device = 0
index = faiss.GpuIndexFlatL2(res, d, flat_config)
index.add(X)

for bi in range(3,10):
    for ki in range(3, 10):
        t = time.time()
        D, I = index.search(X[0:2**bi,:], 2**ki)
        print 2**bi, 2**ki, int((time.time()-t)*1000)

t = time.time()
cpu_index = BallTree(X)
print("BallTree build time (mins)", int((time.time()-t)/60))

#t = time.time()
#D, I = cpu_index.query(X[0:batch_size,:], k)
#print int((time.time()-t)*1000)

for bi in range(3,10):
    for ki in range(3, 10):
        t = time.time()
        D, I = cpu_index.query(X[0:2**bi,:], 2**ki)
        print 2**bi, 2**ki, int((time.time()-t)*1000)


コード例 #25
0
    df = df.append(pd.Series(i), ignore_index=True)
#print(df.shape)
df = df[(df.T != 0).all()]
#print(df.shape)

print('Training model KNN .........')
from sklearn.neighbors.ball_tree import BallTree

tree = BallTree(df, leaf_size=2)

#print(df.shape) # 7839 100


index = np.expand_dims(df.iloc[69,:], axis =0)

dist, ind = tree.query(index, k=3)                # doctest: +SKIP
print(ind)  # indices of 3 closest neighbors
#[0 3 1]
print(dist)  # distances to 3 closest neighbors
# #[ 0.          0.19662693  0.29473397]

v1 = df.iloc[ind[:,0],:]
v2 = df.iloc[ind[:,1],:]
v3 = df.iloc[ind[:,2],:]

V1 = np.array(v1)
V2 = np.array(v2)
V3 = np.array(v3)


for k,v in WandV.items():
コード例 #26
0
    #print(len(WandV.values()))
    #print(WandV.values())
    import pandas as pd
    df = pd.DataFrame()

    for i in WandV.values():
        #print(pd.DataFrame(i))
        df = df.append(pd.Series(i), ignore_index=True)
    #print("temp head",df.head())
    #print("temp shape", df.shape)

    from sklearn.neighbors.ball_tree import BallTree
    print("KNN ...........")
    tree = BallTree(df, leaf_size=2)
    print("finding neighbor words .....")
    dist, ind = tree.query(df[:1], k=3)  # doctest: +SKIP
    print(ind)  # indices of 3 closest neighbors
    #[0 3 1]
    print(dist)  # distances to 3 closest neighbors
    #[ 0.          0.19662693  0.29473397]

    v1 = df.iloc[0, :]
    v2 = df.iloc[363, :]
    v3 = df.iloc[3774, :]

    V1 = np.array(v1)
    V2 = np.array(v2)
    V3 = np.array(v3)

    for k, v in WandV.items():
        comparison = v == V1