Exemple #1
0
class KDBasedKNearestNeighbor(object):
    """
        KDTree-based KNN classifier with L2 distance
    """

    def __init__(self, k=1):
        self.k = k

    def fit(self, X_train, y_train):
        """
            Build KDtree using
            http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KDTree.html
        """
        self.X_train = X_train
        self.y_train = y_train

        return self

    def calc_dist(self, X_test, metric, k=None):
        if k == None:
            k = self.k

        self.kd_tree = KDTree(self.X_train, metric=metric, leaf_size=self.k)

        return self

    def get_neighbors(self, X_test, k=None):
        if k == None:
            k = self.k

        neighbors = self.kd_tree.query(X_test, k)

        num_test = X_test.shape[0]

        y_pred = numpy.zeros(num_test)
        return neighbors[1]

    def predict_labels(self, X_test, k=None):
        """
            Make prediction using kdtree
            Return array of predicted labels
        """
        if k == None:
            k = self.k

        neighbors = self.kd_tree.query(X_test, k)

        num_test = X_test.shape[0]

        y_pred = numpy.zeros(num_test)

        for i in range(num_test):
            closest_y = self.y_train[neighbors[1][i]]
            count = Counter(closest_y)
            # print(count.most_common(1))
            y_pred[i] = count.most_common(1)[0][0]

        return y_pred
def patch_classify():
    """
        patch可视化:观察patch在。
        PCA空间,训练数据和实际数据的关系。
        构造了kd-tree
    """
    with open('training_data_full.pickle') as f:
        # 读取对应的原始patch
        kk = open("raw_data_full.pickle", 'rb')
        raw_lib = cPickle.load(kk)
        raw_lib = np.asarray(raw_lib, dtype='float32')

        # 读取数据转换特征
        training_data = cPickle.load(f)
        patch_lib, feature_lib = training_data
        feature_lib, patch_lib = (np.asarray(feature_lib, dtype='float32'), np.asarray(patch_lib, dtype='float32'))
        feature_lib = feature_lib.reshape((-1, 4 * 9 * 9))

        # 构造KD-tree
        tree = KDTree(feature_lib, leaf_size=len(feature_lib) / 100)

        # 在KD-tree当中搜索最近的100个点
        dist, ind1 = tree.query(feature_lib[5678], k=100)
        nn1 = feature_lib[ind1][0]

        dist, ind2 = tree.query(feature_lib[10000], k=100)
        nn2 = feature_lib[ind2][0]

        dist, ind3 = tree.query(feature_lib[1233], k=100)
        nn3 = feature_lib[ind3][0]

        # 计算并转换PCA空间
        pca = PCA(n_components=2)
        d2_data = pca.fit_transform(feature_lib).T

        # 降临近点的高维坐标转换成PCA空间的低维坐标
        r1 = pca.transform(nn1).T
        r2 = pca.transform(nn2).T
        r3 = pca.transform(nn3).T

        # 设置绘制范围
        ax = plt.axes([0.1, 0.1, 0.8, 0.8])

        # 绘制全部数据的散点图
        ax.scatter(d2_data[0], d2_data[1], c='g')
        # 绘制三个类别的散点图
        ax.scatter(r1[0], r1[1], c='r')
        ax.scatter(r2[0], r2[1], c='b')
        ax.scatter(r3[0], r3[1], c='y')

        # patch_lib \ raw_lib分别是差值patch和原始patch
        patch_show(raw_lib[ind1][0], [0.05, 0.05, 0.4, 0.4], 'red')
        patch_show(raw_lib[ind2][0], [0.05, 0.55, 0.4, 0.4], 'blue')
        patch_show(raw_lib[ind3][0], [0.55, 0.05, 0.4, 0.4], 'yellow')

        plt.show()
def neighbour3dpoints(seqno,f1,f2,no_sets,pointsperset):    
    pcl1name = 'seq'+seqno+'frame'+str(f1)
    pcl2name = 'seq'+seqno+'frame'+str(f2)
    path1 = '/home/manish/Awesomestuff/Subjects/IVP/Project_stereo/gen_data/coordinates/'+ str(pcl1name)+'.npy'
    path2 = '/home/manish/Awesomestuff/Subjects/IVP/Project_stereo/gen_data/coordinates/'+ str(pcl2name)+'.npy'
    cords1 = np.load(path1)
    cords2 = np.load(path2)
    i1 = hp.loadimage_kitti(seqno,'l',f1,0)
    i2 = hp.loadimage_kitti(seqno,'l',f2,0)
    (h,l) = i1.shape
    (pts_1,pts_2) = getfeatures(img, template, no_sets, 0)
    pts3d_1 = featurepoint_toworldtransform(pts_1, (h,l), cords1)
    pts3d_2 = featurepoint_toworldtransform(pts_2, (h,l), cords2)
    
    mask1_1 = np.abs(pts3d_1[:,2])<50;
    mask1_2 = pts3d_1[:,2]>0
    mask1 = np.logical_and(mask1_1,mask1_2)
    
    mask2_1 = np.abs(pts3d_2[:,2])<50;
    mask2_2 = pts3d_2[:,2]>0
    mask2 = np.logical_and(mask2_1,mask2_2)
    
    mask = np.logical_and(mask1,mask2)
    
    pts3d_1 = pts3d_1[mask]
    pts3d_2 = pts3d_2[mask]
    
    n_keypoints = len(pts3d_1)
    print('Total of ' + str(n_keypoints) + ' keypoints are found')
    
    kdt1=KDTree(cords1,leaf_size=30,metric='euclidean')
    dist1, idx1 = kdt1.query(pts3d_1, k=pointsperset, return_distance=True) #Gives in sorted order.
    
    pset1 = []
    
    n_sets = min(n_keypoints,no_sets) #Checking if we have given number of keypoint matches as the sets or not.
    print('Total of ' + str(n_sets)+ ' sets are found')
    for i in range(n_sets):
        pset1.append(pts3d_1[i])
        for j in range(pointsperset):
            pset1.append(cords1[idx1[i][j]])
    pset1 = np.array(pset1)
    
    kdt2 = KDTree(cords2, leaf_size=30, metric='euclidean')
    dist2, idx2 = kdt2.query(pts3d_2, k=pointsperset, return_distance= True)
    
    pset2 = []
    
    for i in range(n_sets):
        pset2.append(pts3d_2[i])
        for j in range(pointsperset):
            pset2.append(cords2[idx2[i][j]])
    pset2 = np.array(pset2)    
    
    return(pset1,pset2)
Exemple #4
0
def _hdbscan_prims_kdtree(X, min_samples=5, alpha=1.0,
                          metric='minkowski', p=2, leaf_size=40, gen_min_span_tree=False):
    if metric == 'minkowski':
        if p is None:
            raise TypeError('Minkowski metric given but no p value supplied!')
        if p < 0:
            raise ValueError('Minkowski metric with negative p value is not defined!')
    elif p is None:
        p = 2  # Unused, but needs to be integer; assume euclidean

    dim = X.shape[0]
    min_samples = min(dim - 1, min_samples)

    tree = KDTree(X, metric=metric, leaf_size=leaf_size)

    dist_metric = DistanceMetric.get_metric(metric)

    core_distances = tree.query(X, k=min_samples,
                                dualtree=True,
                                breadth_first=True)[0][:, -1]
    min_spanning_tree = mst_linkage_core_cdist(X, core_distances, dist_metric, alpha)

    min_spanning_tree = min_spanning_tree[np.argsort(min_spanning_tree.T[2]), :]

    single_linkage_tree = label(min_spanning_tree)

    return single_linkage_tree, None
Exemple #5
0
class Document:

	def __init__(self, embeddings=None, doc_file_name=None, word_index=None, model=None, use_lemma=False):
		# Normal case, build kdtree right from embeddings:
		if (embeddings== None and (not word_index == None) and (not model == None)):
			(idx, embeddings) = Word2VecExecuter.Word2VecLoadWordsHashTable(model, word_index)
			embeddings = np.array(embeddings)
		elif ((not doc_file_name == None) and (not model == None)):
			Features.USE_LEMMA = use_lemma
			Features.REMOVE_FEATURES_ONLY_APPEARING_ONE_TIME = False
			Features.REMOVE_FEATURES_APPEARING_IN_ONLY_ONE_DOCUMENT = False
			words = Features.ReadDependencyParseFile(doc_file_name, funit=Features.FeatureUnits.WORD, remove=False)
			(word_index, embeddings) = Word2VecExecuter.Word2VecLoadWordsHashTable(model, words)
			embeddings = np.array(embeddings)
			del word_index

		self.kd_tree = KDTree(normalize(embeddings), leaf_size=30, metric='euclidean')


	def distance(self, other, theta=0.5):
		if other.__class__ == Document:
			(d_self_to_other, i_self_to_other) = self.kd_tree.query(other.kd_tree.data, k=1, return_distance=True) 
			del i_self_to_other
			(d_other_to_self, i_other_to_self) = other.kd_tree.query(self.kd_tree.data, k=1, return_distance=True) 
			del i_other_to_self
			return np.mean(d_self_to_other)*theta + np.mean(d_other_to_self)*(1-theta)
def match(x,y,mytab):
    """Routine that matches the truth catalog
    with the input table
    
    Args:
    ----
        x: `float` RA of the truth objects to match (in degrees)
        y: `float` dec of the truth objects to match (in degrees)
        mytab: `astropy.table.Table` table containing the L2
            input catalog.

    Returns:
    -------
        ind: `int` array of indices to select the truth objects
            that match the detected objects
    """
    X = np.zeros((len(x),2))
    X[:,0]=x
    X[:,1]=y
    tree = KDTree(X,leaf_size=40)
    Y = np.zeros((len(mytab),2))
    Y[:,0]=mytab['coord_ra']*180/np.pi
    Y[:,1]=mytab['coord_dec']*180/np.pi
    dist, ind = tree.query(Y,k=1)
    print 'Matches with distance > 1 px, ', np.count_nonzero(dist>1)
    return ind
Exemple #7
0
def compute_centroids(X, C):
    """Compute the centroids for dataset X given centers C. Note: centers
    C may not belong to X.
    """
    tree = KDTree(X)
    centroids = tree.query(C, k=1, return_distance=False).squeeze()
    return centroids
Exemple #8
0
def compute_labels(X, C):
    """Compute the cluster labels for dataset X given centers C.
    """
    # labels = np.argmin(pairwise_distances(C, X), axis=0) # THIS REQUIRES TOO MUCH MEMORY FOR LARGE X
    tree = KDTree(C)
    labels = tree.query(X, k=1, return_distance=False).squeeze()
    return labels
Exemple #9
0
    def buildDistanceMap (self, X, Y):
        classes = np.unique(Y)
        nClasses = len(classes)
        tree = KDTree(X)
        nRows = X.shape[0]

        TSOri = np.array([]).reshape(0,self.k)

        distanceMap = np.array([]).reshape(0,self.k)
        labels = np.array([]).reshape(0,self.k)

        for row in range(nRows):
            distances, indicesOfNeighbors = tree.query(X[row].reshape(1,-1), k = self.k+1)

            distances = distances[0][1:]
            indicesOfNeighbors = indicesOfNeighbors[0][1:]

            distanceMap = np.append(distanceMap, np.array(distances).reshape(1,self.k), axis=0)
            labels = np.append(labels, np.array(Y[indicesOfNeighbors]).reshape(1,self.k),axis=0)

        for c in classes:
            nTraining = np.sum(Y == c)
            labelTmp = labels[Y.ravel() == c,:]

            tmpKNNClass = labelTmp.ravel()
            TSOri = np.append(TSOri, len(tmpKNNClass[tmpKNNClass == c]) / (nTraining*float(self.k)))

        return distanceMap, labels, TSOri    
def kdtree(data, lake_matrix, k_neighbors = 10, leaf_size = 20):
    # training
    kdtree = KDTree(data, leaf_size=leaf_size, metric='euclidean')

    # testing
    distances, indices = kdtree.query(lake_matrix, k=k_neighbors)
    return np.array(indices), distances
Exemple #11
0
def match(x1, y1, x2=None, y2=None, k=5, kdt=None):
    X2 = np.vstack([x2, y2]).T
    X1 = np.vstack([x1, y1]).T
    if kdt is None:
        kdt = KDTree(X2, leaf_size=30, metric='euclidean')
    dists, inds = kdt.query(X1, k=k, return_distance=True)
    return dists, inds, kdt
Exemple #12
0
def margin(indices, k, X, y):
    margins = []
    kd_tree = KDTree(X)
    for img_index in indices:
        margin = 0
        in_class = 0
        # most_frequent_class = 0
        current_class = y[img_index]
        # print current_class
        dists, neighbour_indices = kd_tree.query(X[img_index].reshape((1, X[img_index].shape[0])),
                                                 k)
        for index in neighbour_indices[0]:
            # print y[index]
            if y[index] == current_class:
                in_class += 1
        neighbour_dict = {}
        for index in neighbour_indices[0]:
            if y[index] in neighbour_dict:
                neighbour_dict[y[index]] += 1
            else:
                neighbour_dict[y[index]] = 1
        neighbour_dict.pop(current_class)
        if neighbour_dict:
            most_frequent = max(neighbour_dict.items(), key=lambda x: x[1])[1]
        margin = in_class - most_frequent
        margins.append(margin)
    return margins
Exemple #13
0
def _hdbscan_large_kdtree_cdist(X, min_cluster_size=5, min_samples=None, alpha=1.0,
                                metric='minkowski', p=2, gen_min_span_tree=False):

    if p is None:
        p = 2

    dim = X.shape[0]
    min_samples = min(dim - 1, min_samples)

    if metric == 'minkowski':
        tree = KDTree(X, metric=metric, p=p)
    else:
        tree = KDTree(X, metric=metric)

    core_distances = tree.query(X, k=min_samples)[0][:,-1]

    min_spanning_tree = mst_linkage_core_cdist(X, core_distances, metric, p)
    min_spanning_tree = min_spanning_tree[np.argsort(min_spanning_tree.T[2]), :]

    single_linkage_tree = label(min_spanning_tree)
    condensed_tree = condense_tree(single_linkage_tree,
                                   min_cluster_size)
    stability_dict = compute_stability(condensed_tree)
    cluster_list = get_clusters(condensed_tree, stability_dict)

    labels = -1 * np.ones(X.shape[0], dtype=int)
    probabilities = np.zeros(X.shape[0], dtype=float)
    for index, (cluster, prob) in enumerate(cluster_list):
        labels[cluster] = index
        probabilities[cluster] = prob
    return labels, probabilities, condensed_tree, single_linkage_tree, None
Exemple #14
0
def _hdbscan_prims_kdtree(X, min_samples=5, alpha=1.0,
                          metric='minkowski', p=2, leaf_size=40, gen_min_span_tree=False):
    if metric == 'minkowski':
        if p is None:
            raise TypeError('Minkowski metric given but no p value supplied!')
        if p < 0:
            raise ValueError('Minkowski metric with negative p value is not defined!')
    elif p is None:
        p = 2  # Unused, but needs to be integer; assume euclidean

    size = X.shape[0]
    min_samples = min(size - 1, min_samples)

    tree = KDTree(X, metric=metric, leaf_size=leaf_size)

    #TO DO: Deal with p for minkowski appropriately
    dist_metric = DistanceMetric.get_metric(metric)

    #Get distance to kth nearest neighbour
    core_distances = tree.query(X, k=min_samples,
                                dualtree=True,
                                breadth_first=True)[0][:, -1]
    #Mutual reachability distance is implicite in mst_linkage_core_cdist
    min_spanning_tree = mst_linkage_core_cdist(X, core_distances, dist_metric, alpha)

    #Sort edges of the min_spanning_tree by weight
    min_spanning_tree = min_spanning_tree[np.argsort(min_spanning_tree.T[2]), :]

    #Convert edge list into standard hierarchical clustering format
    single_linkage_tree = label(min_spanning_tree)

    return single_linkage_tree, None
def _rsl_prims_kdtree(X, cut, k=5, alpha=1.4142135623730951, gamma=5, metric='minkowski', p=2):

    if metric == 'minkowski':
        if p is None:
            raise TypeError('Minkowski metric given but no p value supplied!')
        if p < 0:
            raise ValueError('Minkowski metric with negative p value is not defined!')
    elif p is None:
        p = 2 # Unused, but needs to be integer; assume euclidean

    dim = X.shape[0]
    k = min(dim - 1, k)

    tree = KDTree(X, metric=metric)

    dist_metric = DistanceMetric.get_metric(metric)

    core_distances = tree.query(X, k=k)[0][:,-1]
    min_spanning_tree = mst_linkage_core_cdist(X, core_distances, dist_metric)

    single_linkage_tree = label(min_spanning_tree)
    single_linkage_tree = SingleLinkageTree(single_linkage_tree)

    labels = single_linkage_tree.get_clusters(cut, gamma)

    return labels, single_linkage_tree
Exemple #16
0
def margin_new(indices, k, X, y):
    margins = []
    kd_tree = KDTree(X)
    for img_index in indices:
        margin = 0
        dist_to_class = 0
        dist_to_others = 0
        current_class = y[img_index]
        dists, neighbour_indices = kd_tree.query(X[img_index].reshape((1, X[img_index].shape[0])),
                                                 k)
        classes = {}
        for i in xrange(neighbour_indices[0].shape[0]):
            index = neighbour_indices[0][i]
            if y[index] in classes:
                classes[y[index]] += dists[0][i]
            else:
                classes[y[index]] = dists[0][i]
        dist_to_class = classes[current_class]
        classes.pop(current_class)
        # print classes.items()
        if classes:
            dist_to_others = min(classes.items(), key=lambda x: x[1])[1]
        margin = dist_to_class - dist_to_others
        margins.append(margin)
    return margins
def test_kdtree_projection(datas):

    from sklearn.neighbors import KDTree
    from sklearn import random_projection


    # datas = parse()
    Fs = fingerprints(datas)

    # The random projection
    transformer = random_projection.GaussianRandomProjection(n_components = 128)
    Fs_new = transformer.fit_transform(Fs)
    print Fs_new.shape

    tree = KDTree(Fs_new, leaf_size=20)

    # Select a random target
    target_i = random.choice(range(len( datas )))
    target = datas[target_i]
    Tf = np.vstack([fingerprint(target)])
    Tf_new = transformer.transform(Tf)

    # Match it
    with timer(10):
        for _ in xrange(10):
            dist, ind = tree.query(Tf_new, k=3)
    assert datas[ind[0][0]] == datas[target_i]
Exemple #18
0
	def constructLMap(self):
		self.obstacleArray = []
		self.allPositions = []	
		#build your obstacle array 
		for i in range( len(self.map.grid) ):	
			for j in range( len(self.map.grid[0])):	
				[x, y] = self.map.cell_position(i, j) 
				if self.map.get_cell(x,y) == 1.0:
					self.obstacleArray.append(np.array(self.map.cell_position(i, j))) 
					#print self.map.cell_position(i, j)	
				self.allPositions.append(np.array(self.map.cell_position(i, j)))  
		#pass it into kdtree
		eExp = []
	
		kdt = KDTree(self.obstacleArray)
		dists = kdt.query(self.allPositions, k=1)[0][:]	
		self.laserStdDev = self.config["laser_sigma_hit"]
		constant = 1.0/( m.sqrt( 2 * m.pi) * self.laserStdDev )
		eExp = np.exp(-0.5*( dists**2  )/( self.laserStdDev**2 ) )
		probObsGivenLaser = eExp
		self.lMap.grid = probObsGivenLaser.reshape(self.lMap.grid.shape) 

		self.occupancyGridMsg = self.lMap.to_message()		
		
		self.lMapPublisher.publish(self.occupancyGridMsg) 
Exemple #19
0
def match_regions(polygons, regionlocs, n_dim=2):
    """

    Parameters
    ----------
    polygons: list or array_like
        the polygons information.
    regionlocs: array_like
        the location information of the regions.
    n_dim: integer
        the number of dimensions.

    Returns
    -------
    assign_r: array_like
        the assignated regions.
    """
    n = len(polygons)
    centroids = np.zeros((n, n_dim))
    for i in xrange(n):
        centroids[i, :] = np.array(polygons[i])
    ret = KDTree(regionlocs)
    assign_r = np.zeros(n).astype(int)
    for i in xrange(n):
        assign_r[i] = ret.query(centroids[[i]])[1][0]
    return assign_r
def uniform_points_points_sampling(limits, points, n):
    """Select the spatial uniform points in the sample by sampling uniform
    spatial points and getting the nearest ones in the available ones.

    Parameters
    ----------
    limits: numpy.ndarray, shape (2, 2)
        the limits of the space. There is the square four limits which defines
        the whole retrievable region.
    points: numpy.ndarray
        the points in the space selected.
    n: int
        the number of samples we want.

    Returns
    -------
    indices: numpy.ndarray, shape(n)
        the indices of the samples.

    """

    ## 0. Initialize retriever
    retriever = KDTree(points)
    ## 1. Compute spatial uniform points
    points_s = uniform_points_sampling(limits, n)
    ## 2. Get the nearest points in the sample
    result = retriever.query(points_s, k=1)
    indices = result[1]
    indices = indices.astype(int)
    return indices
Exemple #21
0
def estimatenormals(points, npoints = 40, method = 'pca'):
    """
    estimate the normals of points

    :param points: an array of [x, y, z]
    :param method: 'pca' or 'ransac', theoretically ransac is more precise when there are more points
    :return: a list of normal vectors

    author: weiwei
    date: 20170714
    """

    pointsnormals = []
    camerapos = np.array([0.0,0.0,0.0])
    kdt = KDTree(points)

    if method == 'pca':
        regionpntidlist = kdt.query(points, k=npoints, return_distance=False)
        for i, pntidlist in enumerate(regionpntidlist):
            regionpnts = points[pntidlist]
            covmat = np.cov(regionpnts.T)
            eigvalues, eigmat = np.linalg.eig(covmat)
            idx = np.argmin(eigvalues)
            eigvec = eigmat[:, idx]
            if np.dot(eigvec, camerapos-points[i]) < 0:
                eigvec = -eigvec
            pointsnormals.append(eigvec)
    elif method == 'ransac':
        # NOTE: this part is not usable due to small npoints
        ransacer = linear_model.RANSACRegressor(linear_model.LinearRegression())
        regionpntidlist = kdt.query(points, k=npoints, return_distance=False)
        for i, pntidlist in enumerate(regionpntidlist):
            XYZ = points[pntidlist]
            ransacer.fit(XYZ[:, 0:2], XYZ[:, 2])
            inlier_mask = ransacer.inlier_mask_

            regionpnts = XYZ[inlier_mask]
            covmat = np.cov(regionpnts.T)
            eigvalues, eigmat = np.linalg.eig(covmat)
            idx = np.argmin(eigvalues)
            eigvec = eigmat[:, idx]
            if np.dot(eigvec, camerapos-points[i]) < 0:
                eigvec = -eigvec
            pointsnormals.append(eigvec)

    return pointsnormals
Exemple #22
0
def get_hip_rank(points, sub):
    sub_coords = sub[['lat', 'lng']].values
    if not sub_coords.shape:
        return []
    sub_scores = sub.checkinsCount.apply(int).values
    kdt = KDTree(sub_coords, metric='euclidean')
    d, i = kdt.query(np.array(points), k=10)
    return (sub_scores[i] / d**2 * 1e-11).sum(axis=1)
Exemple #23
0
def get_median_neighbors(df, n_neighbors, adj_r):
	'''
	INPUT: Pandas dataframe, and the number of comparable neighbors
	of each listing we'll take the median price of in adding the 
	median_neighbor_prices feature
	OUTPUT: Pandas dataframe with the median prices of the n_neighbors
	closest comparables added as a feature. This is accomplished using a 
	KD-Tree model to search for nearest-neighbors
	'''
	kd_df = df[['latitude', 'longitude']]
	kdvals = kd_df.values
	kd = KDTree(kdvals, leaf_size = 1000)
	cPickle.dump(kd, open('../models/kd_tree.pkl', 'wb'))
	neighbors = kd.query(kdvals, k=100)

	median_neighbor_prices = []
	
	for i in xrange(len(df)):
	    listing_neighbors = neighbors[1][i]
	    listing_id = df.ix[i,'id']
	    n_beds = df.ix[i,'beds']
	    sale_y = df.ix[i, 'sale_y']
	   
	    sub_df = df[(df.index.isin(listing_neighbors))]
	    sub_df = sub_df[
	        (sub_df['beds']  == n_beds)  &
	        (sub_df['id']    != listing_id)
	        ]

	    comp_listings = [item for item in listing_neighbors if item in sub_df.index]
	    df_filtered = pd.DataFrame()
	    df_filtered['last sale price']= df['last sale price'][comp_listings][:n_neighbors]
	    df_filtered['sale_y'] = df['sale_y'][comp_listings][:n_neighbors]

	    df_filtered['price adjusted'] = df_filtered['last sale price'] * (1.0 + (sale_y - df_filtered['sale_y']) * adj_r)
	    med_price = df_filtered['price adjusted'].median()
	    if med_price > 0:
	        median_neighbor_prices.append(med_price)
	    else:
			df_filtered = pd.DataFrame()
			df_filtered['last sale price']= df['last sale price'][comp_listings][:n_neighbors+10]
			df_filtered['sale_y'] = df['sale_y'][comp_listings][:n_neighbors+10]

			df_filtered['price adjusted'] = df_filtered['last sale price'] * (1.0 + (sale_y - df_filtered['sale_y']) * adj_r)
			med_price = df_filtered['price adjusted'].median()
			
			if med_price > 0:
			    median_neighbor_prices.append(med_price)
			else:
				df['price adjusted'] = df['last sale price'] * (1.0 + (sale_y - df['sale_y']) * adj_r)
				med_price = df['price adjusted'][comp_listings].median()
				median_neighbor_prices.append(med_price)

	df['med_neighbor_price'] = median_neighbor_prices
	   
	rmse = np.mean((df['med_neighbor_price'] - df['last sale price'])**2)**0.5
	print 'RMSE is ', rmse
	return df    
def environment(x_h, y_h, z_h, x, y, z, D3):
    DD = np.array([x, y, z])
    DD = DD.T
    tree = KDTree(DD, leaf_size=20000)
    index = np.where(x_h == x)[0]
    dist, ind = tree.query(DD[index], k=4)
    r3 = max(dist[0])
    delta3 = D3**3.0 * (1.0/(r3**3.0) - 1.0/(D3**3.0))
    return  delta3
Exemple #25
0
def retrieve_7major_cp(locs, raw_locs, raw_cps):
    raw_cps = np.array(raw_cps).astype(int)
    ret = KDTree(raw_locs)
    new_cps = []
    for i in range(len(locs)):
        neighs = ret.query(locs[[i]], 7)[1].ravel()
        c = Counter([raw_cps[nei] for nei in neighs])
        new_cps.append(c.keys()[np.argmax(c.values())])
    return new_cps
def negativeLabels(features, positiveLabels):
    neg_lab = [[]]*len(features)
    for i in range(1, len(features)):
        kdt = KDTree(features[i]['RegionCenter'], metric='euclidean')
        neighb = kdt.query(features[i-1]['RegionCenter'], k=3, return_distance=False)
        for j in range(1, len(features[i])):
            for m in range(0, neighb.shape[1]):
                neg_lab[i].append([j,neighb[j][m]])
    return neg_lab
Exemple #27
0
def find_knn(pts0, eval_pts, k=15):
    '''
    find the points within `pts0` closest to `eval_pts`
    '''
    pts0range = (pts0.max(axis=0) - pts0.min(axis=0))
    neigh = KDTree(pts0 / pts0range)

    nni = neigh.query(eval_pts / pts0range, k=k, return_distance=False)
    return nni
Exemple #28
0
def main():

    digits = load_digits()

    X = digits.data
    y = digits.target

    num_classes = np.unique(y).shape[0]

    plot_digits(X)

    # TSNE
    # Barnes-Hut: O(d NlogN) where d is dim and N is the number of samples
    # Exact: O(d N^2)
    t0 = time()
    tsne = manifold.TSNE(n_components=2, init="pca", method="barnes_hut", verbose=1)
    X_tsne = tsne.fit_transform(X)
    t1 = time()
    print "t-SNE: %.2f sec" % (t1 - t0)
    tsne.get_params()

    plt.figure(2)
    for k in range(num_classes):
        plt.plot(X_tsne[y == k, 0], X_tsne[y == k, 1], "o")
    plt.title("t-SNE embedding of digits dataset")
    plt.xlabel("X1")
    plt.ylabel("X2")
    axes = plt.gca()
    axes.set_xlim([X_tsne[:, 0].min() - 1, X_tsne[:, 0].max() + 1])
    axes.set_ylim([X_tsne[:, 1].min() - 1, X_tsne[:, 1].max() + 1])
    plt.show()

    # ISOMAP
    # 1. Nearest neighbors search: O(d log k N log N)
    # 2. Shortest path graph search: O(N^2(k+log(N))
    # 3. Partial eigenvalue decomposition: O(dN^2)

    t0 = time()
    isomap = manifold.Isomap(n_neighbors=5, n_components=2)
    X_isomap = isomap.fit_transform(X)
    t1 = time()
    print "Isomap: %.2f sec" % (t1 - t0)
    isomap.get_params()

    plt.figure(3)
    for k in range(num_classes):
        plt.plot(X_isomap[y == k, 0], X_isomap[y == k, 1], "o", label=str(k), linewidth=2)
    plt.title("Isomap embedding of the digits dataset")
    plt.xlabel("X1")
    plt.ylabel("X2")
    plt.show()

    # Use KD-tree to find k-nearest neighbors to a query image
    kdt = KDTree(X_isomap)
    Q = np.array([[-160, -30], [-102, 14]])
    kdt_dist, kdt_idx = kdt.query(Q, k=20)
    plot_digits(X[kdt_idx.ravel(), :])
def test_distance(datas):
    from sklearn.neighbors import KDTree
    from sklearn import random_projection

    Fs = fingerprints(datas)

    # The random projection
    transformer = random_projection.GaussianRandomProjection(n_components = 7)
    Fs_new = transformer.fit_transform(Fs)
    print Fs_new.shape

    tree = KDTree(Fs_new, leaf_size=20)

    # Select a random target
    correct = []
    wrong = []

    for _ in range(100):
        target_i = random.choice(range(len( datas )))
        target_j = random.choice(range(len( datas )))

        # target i
        target = datas[target_i]
        Tf = np.vstack([fingerprint(target)])
        Tf_new = transformer.transform(Tf)


        # target j
        target2 = datas[target_j]
        Tf2 = np.vstack([fingerprint(target2)])
        Tf_new2 = transformer.transform(Tf2)


        # Match it
        start = clock()
        dist, ind = tree.query(Tf_new.astype(int), k=1)
        dist2, ind2 = tree.query(Tf_new2.astype(int), k=1)

        correct.append(match(Fs[ind[0][0]], Tf[0]))
        wrong.append(match(Fs[ind2[0][0]], Tf[0]))
        end = clock()


    print "Correct: %2.5f (%2.5f), Random: %2.5f (%2.5f)" % (np.mean(correct), np.std(correct), np.mean(wrong), np.std(wrong))
Exemple #30
0
def knn_cond_mutual_information(x, y, z, k, standardize = True, dualtree = False):
    """
    Computes conditional mutual information between two time series x and y 
    conditioned on a third z (which can be multi-dimensional) as
        I(x; y | z) = sum( p(x,y,z) * log( p(z)*p(x,y,z) / p(x,z)*p(y,z) ),
        where p(z), p(x,z), p(y,z) and p(x,y,z) are probability distributions.
    Performs k-nearest neighbours search using k-dimensional tree.
    Uses sklearn.neighbors for KDTree class.

    standardize - whether transform data to zero mean and unit variance
    dualtree - whether to use dualtree formalism in k-d tree for the k-NN search
      could lead to better performance with large N

    According to Frenzel S. and Pompe B., Phys. Rev. Lett., 99, 2007.
    """

    from sklearn.neighbors import KDTree

    # prepare data
    if standardize:
        x = _center_ts(x)
        y = _center_ts(y)
        if isinstance(z, np.ndarray):
            z = _center_ts(z)
        elif isinstance(z, list):
            for cond_ts in z:
                cond_ts = _center_ts(cond_ts)
    z = np.atleast_2d(z)
    data = np.vstack([x, y, z]).T

    # build k-d tree using the maximum (Chebyshev) norm
    tree = KDTree(data, leaf_size = 15, metric = "chebyshev")
    # find distance to k-nearest neighbour per point
    dist, _ = tree.query(data, k = k + 1, return_distance = True, dualtree = dualtree)

    sum_ = 0
    # prepare marginal vectors xz, yz and z
    n_x_z_data = np.delete(data, 1, axis = 1)
    n_y_z_data = np.delete(data, 0, axis = 1)
    n_z_data = np.delete(data, [0, 1], axis = 1)

    # build and query k-d trees in marginal spaces for number of points in a given dist from a point
    tree_x_z = KDTree(n_x_z_data, leaf_size = 15, metric = "chebyshev")
    n_x_z = tree_x_z.query_radius(n_x_z_data, r = dist[:, -1], count_only = True) - 2
    tree_y_z = KDTree(n_y_z_data, leaf_size = 15, metric = "chebyshev")
    n_y_z = tree_y_z.query_radius(n_y_z_data, r = dist[:, -1], count_only = True) - 2
    tree_z = KDTree(n_z_data, leaf_size = 15, metric = "chebyshev")
    n_z = tree_z.query_radius(n_z_data, r = dist[:, -1], count_only = True) - 2

    # count points
    for n in range(data.shape[0]):
        sum_ += _neg_harmonic(n_x_z[n]) + _neg_harmonic(n_y_z[n]) - _neg_harmonic(n_z[n])

    sum_ /= data.shape[0]

    return sum_ - _neg_harmonic(k-1)
Exemple #31
0
class KDTreeUtil:
    def __init__(self, data, leaf_size=40):
        self.core = KDTree(np.concatenate(data, axis=0), leaf_size)

    def query(self, dot: np.ndarray, k=1, return_dist=False):
        return self.core.query(dot, k=k, return_distance=return_dist)
Exemple #32
0
    "AGE", "YRS_CLIMBING", "HEIGHT", "APEINDEX", "WEIGHT", "BMI", "B_AVG",
    "S_AVG"
]
knnData = normData[knnProfileVars]
knnData.describe()

# The reason I propose this variables is because it contains their current level, and descriptive variables that they cannot change, they simply describe their current phisique and experience. In order to create groups of climbers with similar phisiques, experience and current performance, regardless of the way they train, how often they climb, how they approach improvement, what they eat, etc. because those are the variables that the climber can actually change in order to produce a change in their performance.
#
# Now lets use a KDTree (the algorithm inside the KNN algorithm) to find the nearest neighbors of a random climber, lets say the 10th climber in the list.

# In[22]:

climberID = 10
randomClimber = knnData.loc[climberID, :]
tree = KDTree(knnData)
dist, ids = tree.query([randomClimber], k=int(len(knnData.index) / 3))

closestClimbers = knnData.loc[ids[0], :]

comparison = pd.DataFrame()
comparison["SUBJECT_CLIMBER"] = randomClimber
comparison["AVERAGE_CLIMBER"] = knnData.mean()
comparison["KNN_CLOSEST_AVG"] = closestClimbers.mean()

print("After finding the", str(int(len(knnData.index) / 3)),
      "nearest neighbors we see this behavior in the data distribution")
#display(comparison)

# As you can see, the new group is conformed of climbers with a profile that is closer in similarity to the climber we care about. So now, learning the effect of particular actions on perforance makes more sense, since climbers with very similar bodies and experience intuitively would benefit from similar actions. So if a climber with a similar profile had certain benefit from an action, it would suggest that you probably should too.
#
# So lets get back to a predictive modeling using only these subsets.
Exemple #33
0
class ChessBoardCornerDetector:
    def __init__(self):
        self.distance_threshold = 0.06
        self.calibration_points = None
        self.centers = None
        self.centers_kdtree = None
        self.points_to_examine_queue = None

    def detect_chess_board_corners(self,
                                   img,
                                   debug=False,
                                   *,
                                   path_to_image=None,
                                   path_to_output_folder=None):
        # Calculate corner responses
        response = self.calculate_corner_responses(img)
        # print("%8.2f, convolution" % (time.time() - t_start))
        # Localized normalization of responses
        response_relative_to_neighbourhood = self.local_normalization(
            response, 511)
        # print("%8.2f, relative response" % (time.time() - t_start))
        # Threshold responses
        relative_responses_thresholded = self.threshold_responses(
            response_relative_to_neighbourhood)
        # Locate centers of peaks
        centers = self.locate_centers_of_peaks(relative_responses_thresholded)
        # Select central center of mass
        selected_center = self.select_central_peak_location(centers)
        # Enumerate detected peaks
        calibration_points = self.enumerate_peaks(centers, selected_center)
        # print("%8.2f, grid mapping" % (time.time() - t_start))
        # write output images if debug is True
        if debug:
            # making the output folders
            path_to_output_local_maxima_folder = path_to_output_folder / '4_local_maxima'
            path_to_output_local_maxima_folder.mkdir(parents=False,
                                                     exist_ok=True)
            path_to_output_response_folder = path_to_output_folder / '1_response'
            path_to_output_response_folder.mkdir(parents=False, exist_ok=True)
            path_to_output_response_neighbourhood_folder = path_to_output_folder / '2_respond_relative_to_neighbourhood'
            path_to_output_response_neighbourhood_folder.mkdir(parents=False,
                                                               exist_ok=True)
            path_to_output_response_threshold_folder = path_to_output_folder / '3_relative_response_thresholded'
            path_to_output_response_threshold_folder.mkdir(parents=False,
                                                           exist_ok=True)
            path_response_1 = path_to_output_response_folder / (
                path_to_image.stem + '_response.png')
            cv2.imwrite(str(path_response_1), response)
            path_response_2 = path_to_output_response_neighbourhood_folder / (
                path_to_image.stem + '_response_relative_to_neighbourhood.png')
            cv2.imwrite(str(path_response_2),
                        response_relative_to_neighbourhood * 255)
            path_response_3 = path_to_output_response_threshold_folder / (
                path_to_image.stem + '_relative_responses_thresholded.png')
            cv2.imwrite(str(path_response_3), relative_responses_thresholded)
            canvas = self.show_detected_calibration_points(
                img, self.calibration_points)
            cv2.circle(canvas, tuple(selected_center.astype(int)), 10,
                       (0, 0, 255), -1)
            path_local_max = path_to_output_local_maxima_folder / (
                path_to_image.stem + '_local_maxima.png')
            cv2.imwrite(str(path_local_max), canvas)
        # Detect image covered
        percentage_image_covered = self.image_coverage(calibration_points, img)
        # How straight are the points?
        stats = self.statistics(calibration_points)
        return self.calibration_points, percentage_image_covered, stats

        # Not necessary to output the images when we just want the statistics after undistorting
    def make_statistics(self, img):
        # Calculate corner responses
        response = self.calculate_corner_responses(img)
        # Localized normalization of responses
        response_relative_to_neighbourhood = self.local_normalization(
            response, 511)
        # Threshold responses
        relative_responses_thresholded = self.threshold_responses(
            response_relative_to_neighbourhood)
        # Locate centers of peaks
        centers = self.locate_centers_of_peaks(relative_responses_thresholded)
        # Select central center of mass
        selected_center = self.select_central_peak_location(centers)
        # Enumerate detected peaks
        calibration_points = self.enumerate_peaks(centers, selected_center)
        # How straight are the points?
        stats = self.statistics(calibration_points)
        return stats

    @staticmethod
    def calculate_corner_responses(img):
        locator = MarkerTracker(order=2, kernel_size=45, scale_factor=40)
        greyscale_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        response = locator.apply_convolution_with_complex_kernel(
            greyscale_image)
        return response

    def local_normalization(self, response, neighbourhoodsize):
        _, max_val, _, _ = cv2.minMaxLoc(response)
        response_relative_to_neighbourhood = self.peaks_relative_to_neighbourhood(
            response, neighbourhoodsize, 0.05 * max_val)
        return response_relative_to_neighbourhood

    @staticmethod
    def threshold_responses(response_relative_to_neighbourhood):
        _, relative_responses_thresholded = cv2.threshold(
            response_relative_to_neighbourhood, 0.5, 255, cv2.THRESH_BINARY)
        return relative_responses_thresholded

    def locate_centers_of_peaks(self, relative_responses_thresholded):
        contours, t1 = cv2.findContours(
            np.uint8(relative_responses_thresholded), cv2.RETR_TREE,
            cv2.CHAIN_APPROX_SIMPLE)
        centers = list(map(self.get_center_of_mass, contours))
        return centers

    @staticmethod
    def select_central_peak_location(centers):
        mean_position_of_centers = np.mean(centers, axis=0)
        central_center = np.array(
            sorted(list(centers),
                   key=lambda c: np.sqrt(
                       (c[0] - mean_position_of_centers[0])**2 +
                       (c[1] - mean_position_of_centers[1])**2)))
        return central_center[0]

    def enumerate_peaks(self, centers, selected_center):
        self.centers = centers
        self.centers_kdtree = KDTree(np.array(self.centers))
        self.calibration_points = self.initialize_calibration_points(
            selected_center)
        self.points_to_examine_queue = [(0, 0), (1, 0), (0, 1)]
        for x_index, y_index in self.points_to_examine_queue:
            self.apply_all_rules_to_add_calibration_points(x_index, y_index)
        return self.calibration_points

    @staticmethod
    def show_detected_calibration_points(img, calibration_points):
        canvas = img.copy()
        for x_index, temp in calibration_points.items():
            for y_index, cal_point in temp.items():
                cv2.circle(canvas, tuple(cal_point.astype(int)), 20,
                           (0, 255 * (y_index % 2), 255 * (x_index % 2)), 2)
        return canvas

    def initialize_calibration_points(self, selected_center):
        closest_neighbour, _ = self.locate_nearest_neighbour(selected_center)
        direction = selected_center - closest_neighbour
        rotation_matrix = np.array([[0, 1], [-1, 0]])
        hat_vector = np.matmul(direction, rotation_matrix)
        direction_b_neighbour, _ = self.locate_nearest_neighbour(
            selected_center + hat_vector,
            minimum_distance_from_selected_center=-1)
        calibration_points = collections.defaultdict(dict)
        calibration_points[0][0] = selected_center
        calibration_points[1][0] = closest_neighbour
        calibration_points[0][1] = direction_b_neighbour

        return calibration_points

    def apply_all_rules_to_add_calibration_points(self, x_index, y_index):
        self.rule_one(x_index, y_index)
        self.rule_two(x_index, y_index)
        self.rule_three(x_index, y_index)
        self.rule_four(x_index, y_index)
        self.rule_five(x_index, y_index)

    def rule_three(self, x_index, y_index):
        try:
            # Ensure that we don't overwrite already located
            # points.
            if y_index + 1 in self.calibration_points[x_index]:
                return
            position_one = self.calibration_points[x_index - 1][y_index]
            position_two = self.calibration_points[x_index - 1][y_index + 1]
            position_three = self.calibration_points[x_index][y_index]
            predicted_location = position_two + position_three - position_one
            location, distance = self.locate_nearest_neighbour(
                predicted_location, minimum_distance_from_selected_center=-1)
            reference_distance = np.linalg.norm(position_three - position_one)
            if distance / reference_distance < self.distance_threshold:
                self.calibration_points[x_index][y_index + 1] = location
                self.points_to_examine_queue.append((x_index, y_index + 1))
        except KeyError:
            pass

    def rule_two(self, x_index, y_index):
        try:
            if y_index in self.calibration_points[x_index + 1]:
                return
            position_one = self.calibration_points[x_index - 1][y_index]
            position_two = self.calibration_points[x_index][y_index]
            predicted_location = 2 * position_two - position_one
            location, distance = self.locate_nearest_neighbour(
                predicted_location, minimum_distance_from_selected_center=-1)
            reference_distance = np.linalg.norm(position_two - position_one)
            if distance / reference_distance < self.distance_threshold:
                self.calibration_points[x_index + 1][y_index] = location
                self.points_to_examine_queue.append((x_index + 1, y_index))
        except KeyError:
            pass

    def rule_one(self, x_index, y_index):
        try:
            # Ensure that we don't overwrite already located
            # points.
            if y_index + 1 in self.calibration_points[x_index]:
                return
            position_one = self.calibration_points[x_index][y_index]
            position_two = self.calibration_points[x_index][y_index - 1]
            predicted_location = 2 * position_one - position_two
            location, distance = self.locate_nearest_neighbour(
                predicted_location, minimum_distance_from_selected_center=-1)
            reference_distance = np.linalg.norm(position_two - position_one)
            if distance / reference_distance < self.distance_threshold:
                self.calibration_points[x_index][y_index + 1] = location
                self.points_to_examine_queue.append((x_index, y_index + 1))
        except KeyError:
            pass

    def rule_four(self, x_index, y_index):
        try:
            # Ensure that we don't overwrite already located
            # points.
            if y_index - 1 in self.calibration_points[x_index]:
                return
            position_one = self.calibration_points[x_index][y_index]
            position_two = self.calibration_points[x_index][y_index + 1]
            predicted_location = 2 * position_one - position_two
            location, distance = self.locate_nearest_neighbour(
                predicted_location, minimum_distance_from_selected_center=-1)
            reference_distance = np.linalg.norm(position_two - position_one)
            if distance / reference_distance < self.distance_threshold:
                self.calibration_points[x_index][y_index - 1] = location
                self.points_to_examine_queue.append((x_index, y_index - 1))
        except KeyError:
            pass

    def rule_five(self, x_index, y_index):
        try:
            if y_index in self.calibration_points[x_index - 1]:
                return

            position_one = self.calibration_points[x_index + 1][y_index]
            position_two = self.calibration_points[x_index][y_index]
            predicted_location = 2 * position_two - position_one
            location, distance = self.locate_nearest_neighbour(
                predicted_location, minimum_distance_from_selected_center=-1)
            reference_distance = np.linalg.norm(position_two - position_one)
            if distance / reference_distance < self.distance_threshold:
                self.calibration_points[x_index - 1][y_index] = location
                self.points_to_examine_queue.append((x_index - 1, y_index))
        except KeyError:
            pass

    def locate_nearest_neighbour(self,
                                 selected_center,
                                 minimum_distance_from_selected_center=0):
        reshaped_query_array = np.array(selected_center).reshape(1, -1)
        (distances,
         indices) = self.centers_kdtree.query(reshaped_query_array, 2)
        if distances[0][0] <= minimum_distance_from_selected_center:
            return self.centers[indices[0][1]], distances[0][1]
        else:
            return self.centers[indices[0][0]], distances[0][0]

    @staticmethod
    def distance_to_ref(ref_point):
        return lambda c: ((c[0] - ref_point[0])**2 +
                          (c[1] - ref_point[1])**2)**0.5

    @staticmethod
    def get_center_of_mass(contour):
        m = cv2.moments(contour)
        if m["m00"] > 0:
            cx = m["m10"] / m["m00"]
            cy = m["m01"] / m["m00"]
            result = np.array([cx, cy])
        else:
            result = np.array([contour[0][0][0], contour[0][0][1]])
        return result

    def peaks_relative_to_neighbourhood(self, response, neighbourhoodsize,
                                        value_to_add):
        local_min_image = self.minimum_image_value_in_neighbourhood(
            response, neighbourhoodsize)
        local_max_image = self.maximum_image_value_in_neighbourhood(
            response, neighbourhoodsize)
        response_relative_to_neighbourhood = (response - local_min_image) / (
            value_to_add + local_max_image - local_min_image)
        return response_relative_to_neighbourhood

    @staticmethod
    def minimum_image_value_in_neighbourhood(response, neighbourhood_size):
        """
        A fast method for determining the local minimum value in
        a neighbourhood for an entire image.
        """
        kernel_1 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
        orig_size = response.shape
        for x in range(int(math.log(neighbourhood_size, 2))):
            eroded_response = cv2.morphologyEx(response, cv2.MORPH_ERODE,
                                               kernel_1)
            response = cv2.resize(eroded_response, None, fx=0.5, fy=0.5)
        local_min_image_temp = cv2.resize(response,
                                          (orig_size[1], orig_size[0]))
        return local_min_image_temp

    @staticmethod
    def maximum_image_value_in_neighbourhood(response, neighbourhood_size):
        """
        A fast method for determining the local maximum value in
        a neighbourhood for an entire image.
        """
        kernel_1 = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
        orig_size = response.shape
        for x in range(int(math.log(neighbourhood_size, 2))):
            eroded_response = cv2.morphologyEx(response, cv2.MORPH_DILATE,
                                               kernel_1)
            response = cv2.resize(eroded_response, None, fx=0.5, fy=0.5)
        local_min_image_temp = cv2.resize(response,
                                          (orig_size[1], orig_size[0]))
        return local_min_image_temp

    @staticmethod
    def image_coverage(calibration_points, img):
        h = img.shape[0]
        w = img.shape[1]
        score = np.zeros((10, 10))
        for calibration_point_dict in calibration_points.values():
            for x, y in calibration_point_dict.values():
                (x_bin, x_rem) = divmod(x, w / 10)
                (y_bin, y_rem) = divmod(y, h / 10)
                if x_bin is 10:
                    x_bin = 9
                if y_bin is 10:
                    y_bin = 9
                score[int(x_bin)][int(y_bin)] += 1
        return np.count_nonzero(score)

    @staticmethod
    def shortest_distance(x1, y1, a, b, c):
        d = abs((a * x1 + b * y1 + c)) / (math.sqrt(a * a + b * b))
        return d

    def statistics(self, points):
        # Make a list in which we will return the statistics. This list will be contain two elements, each a tuple.
        # The first tuple is the amount of tested points and average pixel deviation from straight lines for the
        # horizontal points, the second tuple is the same for the vertical points.
        return_list = []
        # Check if the outer key defines the rows or the columns, this is not always the same.
        horizontal = 1 if points[0][0][0] - points[0][1][0] < points[0][0][
            1] - points[0][1][1] else 0
        # Flip the dictionary so we can do this statistic for horizontal and vertical points.
        flipped = collections.defaultdict(dict)
        for key, val in points.items():
            for subkey, subval in val.items():
                flipped[subkey][key] = subval
        # Make sure that we always have the same order, horizontal first in this case.
        horiz_first = (points, flipped) if horizontal else (flipped, points)
        for index, points_list in enumerate(horiz_first):
            count, som = 0, 0
            for k in points_list.values():
                single_col_x, single_col_y = [], []
                if len(k) > 2:
                    for l in k.values():
                        # for the vertical points, X and Y values are switched because polyfit
                        # does not work (well) for points (almost) vertical points
                        if index == 0:
                            single_col_x.append(l[0])
                            single_col_y.append(l[1])
                        else:
                            single_col_x.append(l[1])
                            single_col_y.append(l[0])
                    # Fit a line through the horizontal or vertical points
                    z = np.polynomial.polynomial.polyfit(
                        single_col_x, single_col_y, 1)
                    # Calculate the distance for each point to the line
                    for x, y in zip(single_col_x, single_col_y):
                        d = self.shortest_distance(x, y, z[1], -1, z[0])
                        count += 1
                        som += d
            if count is not 0:
                return_list.append([count, som / count])
            else:
                return_list.append([count, 0])
        return return_list
def execute(context):
    # Load income distribution
    df = pd.read_excel(
        "%s/filosofi_2015/FILO_DISP_COM.xls" % context.config("data_path"),
        sheet_name="ENSEMBLE",
        skiprows=5)[["CODGEO"] +
                    ["D%d15" % q if q != 5 else "Q215" for q in range(1, 10)]]
    df.columns = [
        "commune_id", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9"
    ]
    df["reference_median"] = df["q5"].values

    # Verify spatial data for education
    df_municipalities = context.stage("data.spatial.municipalities")
    requested_communes = set(df_municipalities["commune_id"].unique())
    df = df[df["commune_id"].isin(requested_communes)]

    # Find communes without data
    df["commune_id"] = df["commune_id"].astype("category")
    missing_communes = set(df_municipalities["commune_id"].unique()) - set(
        df["commune_id"].unique())
    print("Found %d/%d municipalities that are missing" %
          (len(missing_communes), len(requested_communes)))

    # Find communes without full distribution
    df["is_imputed"] = df["q2"].isna()
    df["is_missing"] = False
    print("Found %d/%d municipalities which do not have full distribution" %
          (sum(df["is_imputed"]), len(requested_communes)))

    # First, find suitable distribution for incomplete cases by finding the one with the most similar median
    incomplete_medians = df[df["is_imputed"]]["q5"].values

    df_complete = df[~df["is_imputed"]]
    complete_medians = df_complete["q5"].values

    indices = np.argmin(np.abs(complete_medians[:, np.newaxis] -
                               incomplete_medians[np.newaxis, :]),
                        axis=0)

    for k in range(1, 10):
        df.loc[df["is_imputed"],
               "q%d" % k] = df_complete.iloc[indices]["q%d" % k].values

    # Second, add missing municipalities by neirest neighbor
    # ... build tree of existing communes
    df_existing = df_municipalities[df_municipalities["commune_id"].astype(
        str).isin(df["commune_id"])]  # pandas Bug
    coordinates = np.vstack([
        df_existing["geometry"].centroid.x, df_existing["geometry"].centroid.y
    ]).T
    kd_tree = KDTree(coordinates)

    # ... query tree for missing communes
    df_missing = df_municipalities[df_municipalities["commune_id"].astype(
        str).isin(missing_communes)]  # pandas Bug
    coordinates = np.vstack(
        [df_missing["geometry"].centroid.x,
         df_missing["geometry"].centroid.y]).T
    indices = kd_tree.query(coordinates)[1].flatten()

    # ... build data frame of imputed communes
    df_reconstructed = pd.concat([
        df[df["commune_id"] == df_existing.iloc[index]["commune_id"]]
        for index in indices
    ])
    df_reconstructed["commune_id"] = df_missing["commune_id"].values
    df_reconstructed["is_imputed"] = True
    df_reconstructed["is_missing"] = True

    # ... merge the data frames
    df = pd.concat([df, df_reconstructed])
    assert len(df) == len(df["commune_id"].unique())
    assert len(requested_communes - set(df["commune_id"].unique())) == 0

    return df[[
        "commune_id", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9",
        "is_imputed", "is_missing", "reference_median"
    ]]
Exemple #35
0
def slice_file(resolution,
               f=None,
               scale_model=None,
               width_px=None,
               height_px=None,
               width_printer=None,
               height_printer=None):

    print("Status: Loading File.")

    width_multiplier = calculateMultiplier(
        width_px, width_printer)  #converstion from mm to pixels
    height_multiplier = calculateMultiplier(
        height_px, height_printer)  #conversion from mm to pixels

    model = STLModel(f)
    stats = model.stats()

    #Note these are in inches not mm
    sub_vertex = Vector3(stats['extents']['x']['lower'],
                         stats['extents']['y']['lower'],
                         stats['extents']['z']['lower'])

    center_image = [int(width_px / 2), int(height_px / 2)]  #pixels

    model.xmin = model.xmax = None
    model.ymin = model.ymax = None
    model.zmin = model.zmax = None

    print("Status: Scaling Triangles.")

    for triangle in model.triangles:
        triangle.vertices[0] -= sub_vertex
        triangle.vertices[1] -= sub_vertex
        triangle.vertices[2] -= sub_vertex

        # The lines above have no effect on the normal.

        triangle.vertices[0] = (triangle.vertices[0] * scale_model)  #in inches
        triangle.vertices[1] = (triangle.vertices[1] * scale_model)  #in inches
        triangle.vertices[2] = (triangle.vertices[2] * scale_model)  #in inches

        # Recalculate the triangle normal

        u = model.triangles[0].vertices[1] - model.triangles[0].vertices[0]
        v = model.triangles[0].vertices[2] - model.triangles[0].vertices[0]

        triangle.n = Normal((u.y * v.z) - (u.z * v.y),
                            (u.z * v.x) - (u.x * v.z),
                            (u.x * v.y) - (u.y * v.x))
        model.update_extents(triangle)

    print("Status: Calculating Slices")

    stats = model.stats()

    #This is after scaling the object
    sub_vertex = Vector3(stats['extents']['x']['lower'],
                         stats['extents']['y']['lower'],
                         stats['extents']['z']['lower'])
    sup_vertex = Vector3(stats['extents']['x']['upper'],
                         stats['extents']['y']['upper'],
                         stats['extents']['z']['upper'])
    obj_center_xyz = [(sup_vertex.x + sub_vertex.x) / 2,
                      (sup_vertex.y + sub_vertex.y) / 2,
                      (sup_vertex.z + sub_vertex.z) / 2]  #in inches

    slices = np.linspace(
        0.001, stats['extents']['z']['upper'] - 0.001,
        int(stats['extents']['z']['upper'] / (mmToinch(resolution))) + 1)

    tic = time.time()

    for slice in range(len(
            slices)):  #1, int(stats['extents']['z']['upper']), int(interval)):
        dwg = Drawing('outputs/svg/' + str(slice) + '.svg', profile='full')
        pairs = model.slice_at_z(slices[slice])
        #for pair in pairs:
        #	dwg.add(dwg.line(pair[0], pair[1], stroke=rgb(0, 0, 0, "%")))
        #dwg.attribs['viewBox']= str(model.xmin)+" "+str(model.ymin)+" "+ str(model.xmax)+" "+str(model.ymax)
        #dwg.save()
        #cairosvg.svg2png(url = 'outputs/svg/'+str(targetz)+'.svg' , write_to='outputs/png/'+str(targetz)+'.png')

        #Now process vertices
        a = np.asarray(pairs)
        b = a.flatten()
        vert_array = b.reshape(
            int(b.shape[0] / 2), 2
        )  #this is now twice as long and just not four wide, it is now too wide
        tree = KDTree(vert_array, leaf_size=3)
        current_index = 1
        vertices = []
        vertice_sets = []
        visited_vertices = [current_index]
        vertices.append(tuple(vert_array[current_index]))
        for i in range(int(vert_array.shape[0] / 2)):
            to_query = np.reshape(vert_array[current_index], (1, 2))
            dist, ind = tree.query(to_query, k=2)
            for id in list(ind[0]):  #there should only ever be two
                if id != current_index:
                    #if len(visited_vertices) >= vert_array.shape[0]/2:
                    #	print 'GOT INTO HERE'
                    #	break
                    #if we have found a loop,
                    if id in visited_vertices:
                        vertices.append(tuple(vert_array[id]))
                        vertice_sets.append(vertices)
                        vertices = []
                        for next_vert in range(vert_array.shape[0]):
                            if next_vert not in visited_vertices:
                                current_index = next_vert
                    #Now that we have found the match, find the corresponding vertex, remember that they are in pairs of two
                    elif id % 2 == 1:
                        current_index = id - 1
                        break
                    else:
                        current_index = id + 1
                        break
            visited_vertices.append(id)
            vertices.append(tuple(vert_array[current_index]))
            visited_vertices.append(current_index)

        #Draw the percentage done
        sys.stdout.write("\r%d%%" % int(slice / len(slices) * 100))
        sys.stdout.flush()

        #Save the last one to the vertice set
        vertice_sets.append(vertices)
        img = Image.new(
            'RGB',
            (height_px, width_px))  # Use RGB, these may be backwards TODO
        draw = ImageDraw.Draw(img)
        for i in range(len(vertice_sets)):
            if len(vertice_sets[i]) > 2:
                set = convertToPixels(vertice_sets[i], width_multiplier,
                                      height_multiplier, obj_center_xyz,
                                      center_image)
                draw.polygon(set, fill=(255, 255, 255))
        img.save('outputs/png_filled/' + str(slice) + '.png', 'PNG')

    print("Status: Finished Outputting Slices")
    print('Time: ', time.time() - tic)
Exemple #36
0
def generate_label_views(kzip_path,
                         ssd_version,
                         gt_type,
                         n_voting=40,
                         nb_views=2,
                         ws=(256, 128),
                         comp_window=8e3,
                         out_path=None,
                         verbose=False):
    """

    Parameters
    ----------
    kzip_path : str
    gt_type :  str
    ssd_version : str
    n_voting : int
        Number of collected nodes during BFS for majority vote (label smoothing)
    nb_views : int
    ws: Tuple[int]
    comp_window : float
    initial_run : bool
        if True, will copy SSV from default SSD to SSD with version=gt_type
    out_path : str
        If given, export mesh colored accoring to GT labels
    verbose : bool
        Print additional information

    Returns
    -------
    Tuple[np.array]
        raw, label and index views
    """
    _render_mesh_coords = load_rendering_func('_render_mesh_coords')
    assert gt_type in ["axgt",
                       "spgt"], "Currently only spine and axon GT is supported"
    n_labels = 5 if gt_type == "axgt" else 4
    palette = generate_palette(n_labels)
    sso_id = int(re.findall(r"/(\d+).", kzip_path)[0])
    sso = SuperSegmentationObject(sso_id, version=ssd_version)
    if initial_run:  # use default SSD version
        orig_sso = SuperSegmentationObject(sso_id)
        orig_sso.copy2dir(dest_dir=sso.ssv_dir, safe=False)
    if not sso.attr_dict_exists:
        msg = 'Attribute dict of original SSV was not copied successfully ' \
              'to target SSD.'
        raise ValueError(msg)
    sso.load_attr_dict()
    indices, vertices, normals = sso.mesh

    # # Load mesh
    vertices = vertices.reshape((-1, 3))

    # load skeleton
    skel = load_skeleton(kzip_path)
    if len(skel) == 1:
        skel = list(skel.values())[0]
    else:
        skel = skel["skeleton"]
    skel_nodes = list(skel.getNodes())

    node_coords = np.array(
        [n.getCoordinate() * sso.scaling for n in skel_nodes])
    node_labels = np.array(
        [str2intconverter(n.getComment(), gt_type) for n in skel_nodes],
        dtype=np.int)
    node_coords = node_coords[(node_labels != -1)]
    node_labels = node_labels[(node_labels != -1)]

    # create KD tree from skeleton node coordinates
    tree = KDTree(node_coords)
    # transfer labels from skeleton to mesh
    dist, ind = tree.query(vertices, k=1)
    vertex_labels = node_labels[ind]  # retrieving labels of vertices
    if n_voting > 0:
        vertex_labels = bfs_smoothing(vertices,
                                      vertex_labels,
                                      n_voting=n_voting)
    color_array = palette[vertex_labels].astype(np.float32) / 255.

    if out_path is not None:
        if gt_type == 'spgt':  #
            colors = [[0.6, 0.6, 0.6, 1], [0.9, 0.2, 0.2, 1],
                      [0.1, 0.1, 0.1, 1], [0.05, 0.6, 0.6, 1],
                      [0.9, 0.9, 0.9, 1]]
        else:  # dendrite, axon, soma, bouton, terminal, background
            colors = [[0.6, 0.6, 0.6, 1], [0.9, 0.2, 0.2, 1],
                      [0.1, 0.1, 0.1, 1], [0.05, 0.6, 0.6, 1],
                      [0.6, 0.05, 0.05, 1], [0.9, 0.9, 0.9, 1]]
        colors = (np.array(colors) * 255).astype(np.uint8)
        color_array_mesh = colors[
            vertex_labels][:,
                           0]  # TODO: check why only first element, maybe colors introduces an additional axis
        write_mesh2kzip("{}/sso_{}_gtlabels.k.zip".format(out_path, sso.id),
                        sso.mesh[0],
                        sso.mesh[1],
                        sso.mesh[2],
                        color_array_mesh,
                        ply_fname="gtlabels.ply")

    # Initializing mesh object with ground truth coloring
    mo = MeshObject("neuron", indices, vertices, color=color_array)

    # use downsampled locations for view locations, only if they are close to a
    # labeled skeleton node
    locs = generate_rendering_locs(vertices, comp_window /
                                   6)  # 6 rendering locations per comp.
    # window
    dist, ind = tree.query(locs)
    locs = locs[dist[:, 0] < 2000]  #[::3][:5]  # TODO add as parameter

    # # # To get view locations
    # dest_folder = os.path.expanduser("~") + \
    #               "/spiness_skels/{}/view_imgs_{}/".format(sso_id, n_voting)
    # if not os.path.isdir(dest_folder):
    #     os.makedirs(dest_folder)
    # loc_text = ''
    # for i, c in enumerate(locs):
    #     loc_text += str(i) + "\t" + str((c / np.array([10, 10, 20])).astype(np.int)) +'\n' #rescalling to the voxel grid
    # with open("{}/viewcoords.txt".format(dest_folder), "w") as f:
    #     f.write(loc_text)
    # # # DEBUG PART END
    label_views, rot_mat = _render_mesh_coords(locs,
                                               mo,
                                               depth_map=False,
                                               return_rot_matrices=True,
                                               ws=ws,
                                               smooth_shade=False,
                                               nb_views=nb_views,
                                               comp_window=comp_window,
                                               verbose=verbose)
    label_views = remap_rgb_labelviews(label_views[..., :3], palette)[:, None]
    # TODO: the 3 neglects the alpha channel, i.e. remapping labels bigger than 256**3 becomes
    #  invalid
    index_views = render_sso_coords_index_views(sso,
                                                locs,
                                                rot_mat=rot_mat,
                                                verbose=verbose,
                                                nb_views=nb_views,
                                                ws=ws,
                                                comp_window=comp_window)
    raw_views = render_sso_coords(sso,
                                  locs,
                                  nb_views=nb_views,
                                  ws=ws,
                                  comp_window=comp_window,
                                  verbose=verbose,
                                  rot_mat=rot_mat)
    return raw_views, label_views, index_views
Exemple #37
0
def kdtree_nn(points):
    tree = KDTree(points, leaf_size=2)
    dist, ind = tree.query(points[:], k=2)
    return dist[:, 1]
Exemple #38
0
class ABOD(BaseDetector):
    """ABOD class for Angle-base Outlier Detection.
    For an observation, the variance of its weighted cosine scores to all
    neighbors could be viewed as the outlying score.
    See :cite:`kriegel2008angle` for details.

    Two version of ABOD are supported:

    - Fast ABOD: use k nearest neighbors to approximate.
    - Original ABOD: consider all training points with high time complexity at
      O(n^3).

    Parameters
    ----------
    contamination : float in (0., 0.5), optional (default=0.1)
        The amount of contamination of the data set, i.e.
        the proportion of outliers in the data set. Used when fitting to
        define the threshold on the decision function.

    n_neighbors : int, optional (default=10)
        Number of neighbors to use by default for k neighbors queries.

    method: str, optional (default='fast')
        Valid values for metric are:

        - 'fast': fast ABOD. Only consider n_neighbors of training points
        - 'default': original ABOD with all training points, which could be
          slow

    Attributes
    ----------
    decision_scores_ : numpy array of shape (n_samples,)
        The outlier scores of the training data.
        The higher, the more abnormal. Outliers tend to have higher
        scores. This value is available once the detector is
        fitted.

    threshold_ : float
        The threshold is based on ``contamination``. It is the
        ``n_samples * contamination`` most abnormal samples in
        ``decision_scores_``. The threshold is calculated for generating
        binary outlier labels.

    labels_ : int, either 0 or 1
        The binary labels of the training data. 0 stands for inliers
        and 1 for outliers/anomalies. It is generated by applying
        ``threshold_`` on ``decision_scores_``.
    """

    def __init__(self, contamination=0.1, n_neighbors=5, method='fast'):
        super(ABOD, self).__init__(contamination=contamination)
        self.method = method
        self.n_neighbors = n_neighbors

    def fit(self, X, y=None):
        """Fit detector. y is optional for unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """
        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        self.X_train_ = X
        self.n_train_ = X.shape[0]
        self.decision_scores_ = np.zeros([self.n_train_, 1])

        if self.method == 'fast':
            self._fit_fast()
        elif self.method == 'default':
            self._fit_default()
        else:
            raise ValueError(self.method, "is not a valid method")

        # flip the scores
        self.decision_scores_ = self.decision_scores_.ravel() * -1
        self._process_decision_scores()
        return self

    def _fit_default(self):
        """Default ABOD method. Use all training points with high complexity
        O(n^3). For internal use only.
        """
        for i in range(self.n_train_):
            curr_pt = self.X_train_[i, :]

            # get the index pairs of the neighbors, remove itself from index
            X_ind = list(range(0, self.n_train_))
            X_ind.remove(i)

            self.decision_scores_[i, 0] = _calculate_wocs(curr_pt,
                                                          self.X_train_,
                                                          X_ind)
        return self

    def _fit_fast(self):
        """Fast ABOD method. Only use n_neighbors for angle calculation.
        Internal use only
        """

        # make sure the n_neighbors is in the range
        check_parameter(self.n_neighbors, 1, self.n_train_)

        self.tree_ = KDTree(self.X_train_)

        neigh = NearestNeighbors(n_neighbors=self.n_neighbors)
        neigh.fit(self.X_train_)
        ind_arr = neigh.kneighbors(n_neighbors=self.n_neighbors,
                                   return_distance=False)

        for i in range(self.n_train_):
            curr_pt = self.X_train_[i, :]
            X_ind = ind_arr[i, :]
            self.decision_scores_[i, 0] = _calculate_wocs(curr_pt,
                                                          self.X_train_,
                                                          X_ind)
        return self

    # noinspection PyPep8Naming
    def decision_function(self, X):
        """Predict raw anomaly score of X using the fitted detector.

        The anomaly score of an input sample is computed based on different
        detector algorithms. For consistency, outliers are assigned with
        larger anomaly scores.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples. Sparse matrices are accepted only
            if they are supported by the base estimator.

        Returns
        -------
        anomaly_scores : numpy array of shape (n_samples,)
            The anomaly score of the input samples.
        """

        check_is_fitted(self, ['X_train_', 'n_train_', 'decision_scores_',
                               'threshold_', 'labels_'])
        X = check_array(X)

        if self.method == 'fast':  # fast ABOD
            # outliers have higher outlier scores
            return self._decision_function_fast(X) * -1
        else:  # default ABOD
            return self._decision_function_default(X) * -1

    def _decision_function_default(self, X):
        """Internal method for predicting outlier scores using default ABOD.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples.

        Returns
        -------
        pred_score : array, shape (n_samples,)
            The anomaly score of the input samples.

        """
        # initialize the output score
        pred_score = np.zeros([X.shape[0], 1])

        for i in range(X.shape[0]):
            curr_pt = X[i, :]
            # get the index pairs of the neighbors
            X_ind = list(range(0, self.n_train_))
            pred_score[i, :] = _calculate_wocs(curr_pt, self.X_train_, X_ind)

        return pred_score.ravel()

    def _decision_function_fast(self, X):
        """Internal method for predicting outlier scores using Fast ABOD.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The training input samples.

        Returns
        -------
        pred_score : array, shape (n_samples,)
            The anomaly score of the input samples.

        """

        check_is_fitted(self, ['tree_'])
        # initialize the output score
        pred_score = np.zeros([X.shape[0], 1])

        # get the indexes of the X's k nearest training points
        _, ind_arr = self.tree_.query(X, k=self.n_neighbors)

        for i in range(X.shape[0]):
            curr_pt = X[i, :]
            X_ind = ind_arr[i, :]
            pred_score[i, :] = _calculate_wocs(curr_pt, self.X_train_, X_ind)

        return pred_score.ravel()
Exemple #39
0
                    metavar='O',
                    help='Labelled ground truth cloud.')
parser.add_argument('-k',
                    type=int,
                    default=10,
                    metavar='k',
                    help='Number of neighbors.')
args = parser.parse_args()

cloud = read_ply(args.input_file)
points = np.vstack((cloud['x'], cloud['y'], cloud['z'])).T
labels = cloud['class'].astype(np.int32)

labeled = np.nonzero(labels)[0]
not_labeled = np.nonzero(labels == 0)[0]

tree = KDTree(points[labeled])

dist, neighbors = tree.query(points[not_labeled], k=args.k)
neighborslabels = labels[labeled][neighbors]

ind = stats.mode(neighborslabels, axis=1)[0]
labels[not_labeled] = ind.reshape(-1)

# Labelled cloud
write_ply('{}.ply'.format(args.output_prefix), (points, labels),
          ('x', 'y', 'z', 'scalar_class'))

# Label list for IoU computation and benchmark submission
np.savetxt('{}.txt'.format(args.output_prefix), labels, fmt='%d')
seed = (0, 0, 0)
# print("visitednodes",visited_nodes)
# print("all_nodes", all_nodes)
print("\n")

while (goalcheck_circle(goal_x, goal_y, goal_z, seed[0], seed[1],
                        seed[2]) == False):
    seed = generate_seed()
    # print("generated_seed", seed)
    if ((seed not in visited_nodes)
            and not obstacle_check(seed[0], seed[1], seed[2])):

        all_nodes.insert(0, seed)
        X = np.array(all_nodes)
        tree = KDTree(X, leaf_size=2)
        dist, ind = tree.query(X[:1], k=2)
        p = ind[0][1]
        parent = all_nodes[p]

        par = seed
        s = parent
        a = 0
        # print(s)
        while (cost2go(par, s) >= 0.1):
            a = line_obstacle_check(s, par)
            # print(a)
            if obstacle_check(a[0], a[1], a[2]):
                #                 print("inside")
                #                 print("stop point", a)
                break
            s = a
similarity_input_train = pd.concat([attributes1, dummied_features], axis=1)

# store the train and test files
train.to_csv('search/train')
test.to_csv('search/test')
similarity_input.to_csv('search/similarity_input')
similarity_input_train.to_csv('search/similarity_input_train')
user_20_sample.to_csv('search/user_20_sample')

# Use KD Tree from sklearn

kdt = KDTree(np.asarray(similarity_input), leaf_size=30, metric='euclidean')
split1, split2, split3, split4, split5 = np.array_split(
    similarity_input_train, 5)
distance, indices = kdt.query(split1, k=100, return_distance=True)
distance_t, indices_t = kdt.query(split2, k=100, return_distance=True)
distance = np.vstack((distance, distance_t))
indices = np.vstack((indices, indices_t))

distance_t, indices_t = kdt.query(split3, k=100, return_distance=True)
distance = np.vstack((distance, distance_t))
indices = np.vstack((indices, indices_t))

distance_t, indices_t = kdt.query(split4, k=100, return_distance=True)
distance = np.vstack((distance, distance_t))
indices = np.vstack((indices, indices_t))

distance_t, indices_t = kdt.query(split5, k=100, return_distance=True)
distance = np.vstack((distance, distance_t))
indices = np.vstack((indices, indices_t))
class RFAttack(AttackModel):
    def __init__(self,
                 trnX: np.ndarray,
                 trny: np.ndarray,
                 clf: RandomForestClassifier,
                 ord,
                 method: str,
                 n_searches: int = -1,
                 random_state=None):
        """Attack on Random forest classifier
        
        Arguments:
            trnX {ndarray, shape=(n_samples, n_features)} -- Training data
            trny {ndarray, shape=(n_samples)} -- Training label
            clf {RandomForestClassifier} -- The Random Forest classifier
            ord {int} -- Order of the norm for perturbation distance, see numpy.linalg.norm for more information
            method {str} -- 'all' means optimal attack (RBA-Exact), 'rev' means RBA-Approx
        
        Keyword Arguments:
            n_searches {int} -- number of regions to search, only used when method=='rev' (default: {-1})
            random_state {[type]} -- random seed (default: {None})
        """
        super().__init__(ord=ord)
        paths, constraints = [], []
        self.clf = clf
        self.method = method
        self.n_searches = n_searches
        self.trnX = trnX
        self.trny = trny
        self.random_state = random_state
        if self.n_searches != -1:
            self.kd_tree = KDTree(self.trnX)
        else:
            self.kd_tree = None

        if self.method == 'all':
            for tree_clf in clf.estimators_:
                path, constraint = get_tree_constraints(tree_clf)
                paths.append(path)
                constraints.append(constraint)

            n_classes = clf.n_classes_
            n_estimators = len(clf.estimators_)
            self.regions = []
            self.region_preds = []
            vacuan_regions = 0

            for res in product(range(n_classes), repeat=n_estimators):
                perm_consts = [list() for _ in range(n_estimators)]

                for i in range(n_estimators):
                    value = clf.estimators_[i].tree_.value
                    path = paths[i]
                    constraint = constraints[i]

                    for p in range(len(path)):
                        if np.argmax(value[path[p][-1]]) == res[i]:
                            perm_consts[i].append(constraint[p])

                for pro in product(*perm_consts):
                    r = union_constraints(
                        np.vstack([j[0] for j in pro]),
                        np.concatenate([j[1] for j in pro]),
                    )
                    G, h = constraint_list_to_matrix(r)
                    status, _ = solve_lp(np.zeros((len(G[0]))), G,
                                         h.reshape(-1, 1), len(G[0]))
                    if status == 'optimal':
                        self.region_preds.append(np.argmax(np.bincount(res)))
                        #self.regions.append((G, h))
                        self.regions.append(r)
                    else:
                        vacuan_regions += 1

            print(f"number of regions: {len(self.regions)}")
            print(f"number of vacuan regions: {vacuan_regions}")

        elif self.method == 'rev':
            #Gss, hss = [list() for _ in trnX], [list() for _ in trnX]
            #for tree_clf in clf.estimators_:
            #    Gs, hs = tree_instance_constraint(tree_clf, trnX)
            #    #print(len(Gs[0]))
            #    for i, (G, h) in enumerate(zip(Gs, hs)):
            #        Gss[i].append(G)
            #        hss[i].append(h)
            #self.regions = []
            #for i, (Gs, hs) in enumerate(zip(Gss, hss)):
            #    t1, t2 = np.vstack(Gs), np.concatenate(hs)
            #    self.regions.append(union_constraints(t1, t2))

            r = tree_instance_constraint(clf.estimators_[0], trnX)
            for tree_clf in clf.estimators_[1:]:
                t = tree_instance_constraint(tree_clf, trnX)
                r = np.min(np.concatenate(
                    (r[np.newaxis, :], t[np.newaxis, :])),
                           axis=0)
            self.regions = r

            for i in range(len(trnX)):
                G, h = constraint_list_to_matrix(self.regions[i])
                assert np.all(np.dot(G, trnX[i]) <= (h + 1e-8))
                #assert np.all(np.dot(np.vstack(Gss[i]), trnX[i]) <= np.concatenate(hss[i])), i
                #assert np.all(np.dot(G, trnX[i]) <= h), i
        else:
            raise ValueError("Not supported method: %s", self.method)

    def perturb(self, X, y, eps=0.1):
        X = X.astype(np.float32)
        if self.ord == 2:
            get_sol_fn = rev_get_sol_l2
        elif self.ord == np.inf:
            get_sol_fn = rev_get_sol_linf
        else:
            raise ValueError("ord %s not supported", self.ord)

        pred_y = self.clf.predict(X)
        pred_trn_y = self.clf.predict(self.trnX)

        if self.method == 'all':

            def _helper(target_x, target_y, pred_yi):
                if pred_yi != target_y:
                    return np.zeros_like(target_x)
                temp_regions = [self.regions[i] for i in range(len(self.regions)) \
                            if self.region_preds[i] != target_y]
                return get_sol_fn(target_x, target_y, pred_trn_y, temp_regions,
                                  self.clf)

            pert_xs = Parallel(n_jobs=4, verbose=5)(
                delayed(_helper)(X[i], y[i], pred_y[i]) for i in range(len(X)))
            pert_X = np.array(pert_xs)

            assert np.all(self.clf.predict(X + pert_X) != y)

        elif self.method == 'rev':
            pert_X = np.zeros_like(X)
            for sample_id in tqdm(range(len(X)), ascii=True, desc="Perturb"):
                if pred_y[sample_id] != y[sample_id]:
                    continue
                target_x, target_y = X[sample_id], y[sample_id]

                if self.n_searches != -1:
                    ind = self.kd_tree.query(target_x.reshape((1, -1)),
                                             k=len(self.trnX),
                                             return_distance=False)[0]
                    ind = list(filter(lambda x: pred_trn_y[x] != target_y,
                                      ind))[:self.n_searches]
                else:
                    ind = list(
                        filter(lambda x: pred_trn_y[x] != target_y,
                               np.arange(len(self.trnX))))
                temp_regions = [self.regions[i] for i in ind]
                pert_x = get_sol_fn(target_x, y[sample_id], pred_trn_y,
                                    temp_regions, self.clf, self.trnX[ind])

                if np.linalg.norm(pert_x) != 0:
                    assert self.clf.predict([X[sample_id] + pert_x
                                             ])[0] != y[sample_id]
                    pert_X[sample_id, :] = pert_x
                else:
                    raise ValueError("shouldn't happen")
        else:
            raise ValueError("Not supported method %s", self.method)

        self.perts = pert_X
        return self._pert_with_eps_constraint(pert_X, eps)
class SUNNY:
    def __init__(self, determine_best='min-par10'):
        self._name = 'sunny'
        self._imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
        self._scaler = StandardScaler()
        self._determine_best = determine_best
        self._k = 16

    def get_name(self):
        return self._name

    def fit(self, scenario: ASlibScenario, fold: int, num_instances: int):
        self._num_algorithms = len(scenario.algorithms)
        self._algorithm_cutoff_time = scenario.algorithm_cutoff_time

        # resample `amount_of_training_instances` instances and preprocess them accordingly
        features, performances = self._resample_instances(
            scenario.feature_data,
            scenario.performance_data,
            num_instances,
            random_state=fold)
        features, performances = self._preprocess_scenario(
            scenario, features, performances)

        # build nearest neighbors index based on euclidean distance
        self._model = KDTree(features, leaf_size=30, metric='euclidean')
        self._performances = np.copy(performances)

    def predict(self, features, instance_id: int):
        assert (features.ndim == 1), '`features` must be one dimensional'
        features = np.expand_dims(features, axis=0)
        features = self._imputer.transform(features)
        features = self._scaler.transform(features)

        neighbour_idx = np.squeeze(
            self._model.query(features, k=self._k, return_distance=False))

        if self._determine_best == 'subportfolio':
            if np.isnan(self._performances).any():
                raise NotImplementedError()

            sub_portfolio = self._build_subportfolio(neighbour_idx)
            schedule = self._build_schedule(neighbour_idx, sub_portfolio)
            selection = schedule[0]

        elif self._determine_best == 'max-solved':
            if np.isnan(self._performances).any():
                raise NotImplementedError()

            # select the algorithm which solved the most instances (use min PAR10 as tie-breaker)
            sub_performances = self._performances[neighbour_idx, :]
            num_solved = np.sum(sub_performances < self._algorithm_cutoff_time,
                                axis=0)
            max_solved = np.max(num_solved)
            indices, = np.where(num_solved >= max_solved)
            sub_performances = sub_performances[:, indices]
            runtime = np.sum(sub_performances, axis=0)
            selection = indices[np.argmin(runtime)]

        elif self._determine_best == 'min-par10':
            # select the algorithm with the lowest mean PAR10 score (use max solved as tie-breaker)
            sub_performances = self._performances[neighbour_idx, :]
            runtime = np.nanmean(sub_performances, axis=0)

            if not np.isnan(runtime).all():
                min_runtime = np.nanmin(runtime)
                runtime = np.nan_to_num(runtime, nan=np.inf)

            else:
                return np.random.choice(self._num_algorithms)

            indices, = np.where(runtime <= min_runtime)
            sub_performances = sub_performances[:, indices]

            num_solved = np.sum(
                np.nan_to_num(sub_performances, nan=np.inf) <
                self._algorithm_cutoff_time)
            selection = indices[np.argmax(num_solved)]

        else:
            ValueError('`{}` is no valid selection strategy'.format(
                self._determine_best))

        # create ranking st. the selected algorithm has rank 0, any other algorithm has rank 1
        ranking = np.ones(self._num_algorithms)
        ranking[selection] = 0
        return ranking

    def _build_subportfolio(self, neighbour_idx):
        sub_performances = self._performances[neighbour_idx, :]

        # naive, inefficient computation
        algorithms = range(self._num_algorithms)
        num_solved, avg_time = np.NINF, np.NINF
        sub_portfolio = None
        for subset in chain.from_iterable(
                combinations(algorithms, n)
                for n in range(1, len(algorithms))):
            # compute number of solved instances and average solving time
            tmp_solved = np.count_nonzero(
                np.min(sub_performances[:, subset], axis=1) <
                self._algorithm_cutoff_time)

            # TODO: not entirely sure whether this is the correct way to compute the average runtime as mentioned in the paper
            tmp_avg_time = np.sum(
                sub_performances[:, subset]) / sub_performances[:, subset].size
            if tmp_solved > num_solved or (tmp_solved == num_solved
                                           and tmp_avg_time < avg_time):
                num_solved, avg_time = tmp_solved, tmp_avg_time
                sub_portfolio = subset

        return sub_portfolio

    def _build_schedule(self, neighbour_idx, sub_portfolio):
        # schedule algorithms wrt. to solved instances (asc.) and break ties according to its average runtime (desc.)
        sub_performances = self._performances[neighbour_idx, :]
        alg_performances = {
            alg: (np.count_nonzero(
                sub_performances[:, alg] < self._algorithm_cutoff_time),
                  (-1) * np.sum(sub_performances[:, alg]))
            for alg in sub_portfolio
        }
        schedule = sorted(
            [(solved, avg_time, alg)
             for (alg, (solved, avg_time)) in alg_performances.items()],
            reverse=True)

        return [alg for (_, _, alg) in schedule]

    def _resample_instances(self, feature_data, performance_data,
                            num_instances, random_state):
        num_instances = min(num_instances, np.size(
            performance_data, axis=0)) if num_instances > 0 else np.size(
                performance_data, axis=0)
        return resample(feature_data,
                        performance_data,
                        n_samples=num_instances,
                        random_state=random_state)

    def _preprocess_scenario(self, scenario, features, performances):
        features = self._imputer.fit_transform(features)
        features = self._scaler.fit_transform(features)

        return features, performances
Exemple #44
0
def kd_tree(p_ind, p_cloud):
    tree = KDTree(p_cloud, leaf_size=400)
    dist, ind = tree.query(p_cloud[p_ind].reshape(1, -1), k=5000)
    return ind[0]
Exemple #45
0
 def _near_neighbours(self, points):
     tree = KDTree(points, leaf_size=2)
     dist, ind = tree.query(points[:], k=2)
     return dist[:, 1]
class RrtStar:
    def __init__(self,
                 start,
                 goal,
                 obstacle_list,
                 map_limits,
                 path_max=5,
                 path_elements=10,
                 max_iter=1000,
                 goal_sample_rate=0.1):
        """
        Args:
            start (list/tuple): Start coordinates (x, y)
            goal (list/tuple): Goal Coordinates (x, y)
            obstacle_list (list): List of obstacles [[x, y, dx, dy], ...]
            map_limits (list/tuple): Search space boundaries [x_min, m_max, y_min, y_max]
            path_max (number): Maximum path length
            path_elements (int): Resolution of path to check for collisions
            max_iter (int): Maximum number of iterations to search
            goal_sample_rate (number): Rate at which to randomly sample goal position as next node (0 -> 1).
                                        This creates a bias towards exploring in the goal direction.
        """
        self.obstacle_list = obstacle_list
        # Create kd-tree of obstacles using (x, y) coordinates
        self.obstacle_tree = KDTree([x[:2] for x in obstacle_list])

        if self.point_collision_free(start):
            self.start = PathNode(start, cost=0)
        else:
            raise ValueError("Start position in collision")

        if self.point_collision_free(goal):
            self.goal = PathNode(goal)
        else:
            raise ValueError("Goal position in collision")

        self.map_limits = map_limits
        self.path_max = path_max
        self.path_elements = path_elements
        self.max_iter = max_iter
        self.node_list = []
        self.goal_sample_rate = goal_sample_rate
        self.goal_node = False

    def point_collision_free(self, point):
        """
        Args:
            point (list/tuple): Coordinates (x, y)
        """
        obstacle_list = []
        i_obstacles = self.obstacle_tree.query([point],
                                               k=1,
                                               return_distance=False)[0]
        for i_obstacle in i_obstacles:
            obstacle_list.append(self.obstacle_list[i_obstacle])
        for (ox, oy, odx, ody) in obstacle_list:
            dx = abs(ox - point[0])
            dy = abs(oy - point[1])
            if dx <= odx and dy <= ody:
                return False  # Collision
        return True  # safe

    def path_collision_free(self, path):
        """
        Args:
            path (list/tuple): Coordinates ((x, y), ...)
        """
        obstacle_list = []

        for coordinates in path:
            i_obstacles = self.obstacle_tree.query([coordinates],
                                                   k=1,
                                                   return_distance=False)[0]
            for i_obstacle in i_obstacles:
                obstacle_list.append(self.obstacle_list[i_obstacle])

        for (ox, oy, odx, ody) in obstacle_list:
            dx_list = [abs(ox - x[0]) for x in path]
            dy_list = [abs(oy - x[1]) for x in path]

            for dx, dy in zip(dx_list, dy_list):
                if dx <= odx and dy <= ody:
                    return False  # Collision

        return True  # safe

    def get_random_node(self):
        if np.random.random() > self.goal_sample_rate or self.goal_node:
            node = PathNode([
                np.random.uniform(self.map_limits[0], self.map_limits[1]),
                np.random.uniform(self.map_limits[2], self.map_limits[3])
            ])
        else:  # goal point sampling
            node = PathNode(self.goal.coordinates)
        return node

    def create_valid_path(self,
                          node,
                          cost_fn=fn_straight_line,
                          limit_path=True):
        if node.parent is None:
            raise ValueError("Node requires a parent")

        # Move along subpath until distance limit is reached or obstacle is hit
        step_count = 1
        collision_free = True
        start = node.parent.coordinates
        distance_to_parent = node.cost_to_node(node.parent, cost_fn=cost_fn)
        if distance_to_parent == 0:
            return None

        updated_node = PathNode(start, parent=node.parent)
        updated_node.path = [start]

        if limit_path:
            path_length = min(self.path_max, distance_to_parent)
        else:
            path_length = distance_to_parent
        path_increment = path_length / self.path_elements

        while (step_count * path_increment <= path_length and collision_free):
            xy, _ = cost_fn(updated_node.path[-1],
                            node.coordinates,
                            distance=path_increment)
            # Check for collisison
            if self.point_collision_free(xy):
                updated_node.path.append(xy)
                updated_node.coordinates = xy
            else:
                collision_free = False
                updated_node.collision_avoided = True
            step_count += 1
        # If no path found return None
        if len(updated_node.path) <= 1:
            return None
        else:
            updated_node.update_cost(cost_fn=cost_fn)
            # Check if original node has been adjusted due to obstacles or maximum length
            if not updated_node.path_same_as_parent(
                (step_count - 1) * path_increment):
                updated_node.truncated = True
            return updated_node

    def get_parent_node(self, node, node_tree=None, method=2):
        if node_tree is None:
            node_tree = KDTree([n.coordinates for n in self.node_list])

        # Method 1: FIn closest node then look for cheapest neighbour
        if method == 1:
            # Get closest nodes
            i_closest = node_tree.query([node.coordinates],
                                        k=1,
                                        return_distance=False)[0][0]
            # Look in radius around this for cheaper node
            i_nearby = node_tree.query_radius(
                [self.node_list[i_closest].coordinates],
                r=self.path_max * 0.5,
                return_distance=False)[0]
            costs = []
            for i_node in i_nearby:
                costs.append(self.node_list[i_node].cost)
            i_cheapest = i_nearby[np.argmin(costs)]

        # Method 2: FInd cheapest node from within radius
        if method == 2:
            n_neighbours = min(5, len(self.node_list))
            i_nearby = node_tree.query([node.coordinates],
                                       k=n_neighbours,
                                       return_distance=False)[0]
            costs = []
            for i_node in i_nearby:
                node_nearby = self.node_list[i_node]
                cost_to_neighbour = node_nearby.cost + node_nearby.cost_to_node(
                    node)
                costs.append(cost_to_neighbour)

            i_cheapest = i_nearby[np.argmin(costs)]

        return self.node_list[i_cheapest]

    def plan(self, animation=False):
        print("Starting planning ...")
        # Reinitialise node list
        self.node_list = [self.start]
        self.goal_found = False
        print_iters = self.max_iter / 10
        for i in range(self.max_iter):
            if i % print_iters == 0:
                print("Iteration {}".format(i))
            # Select random node
            random_node = self.get_random_node()
            # Create KD Tree of nodes for multiple queries
            node_tree = KDTree([n.coordinates for n in self.node_list])
            # Pick parent node based upon proximity and cost
            random_node.parent = self.get_parent_node(random_node,
                                                      node_tree=node_tree)
            # Update node to reflect path constraints
            valid_node = self.create_valid_path(random_node)
            # Assuming valid update graph
            if valid_node:
                if valid_node.close_to_node(self.goal):
                    self.goal_node = valid_node
                self.node_list.append(valid_node)
                valid_node.parent.children.add(valid_node)
                self.rewire(valid_node, node_tree)
            if animation:
                self.draw_map(animation=True)

        print("Reached max iterations")
        if self.goal_node:
            print("Goal found")
        else:
            print("Goal not found")

    def rewire(self, new_node, node_tree, path_fn=fn_straight_line):
        # Find nodes within radius equal to max path length
        i_nearby = node_tree.query_radius([new_node.coordinates],
                                          r=self.path_max,
                                          return_distance=False)[0]
        for i_node in i_nearby:
            # Look at each of these nodes in turn
            node = self.node_list[i_node]
            # Create temporary node with position at each nearby node
            # As a test, make the new node it's parent
            temp_node = PathNode(node.coordinates, parent=new_node)
            temp_node.update_cost()

            checked_node = self.create_valid_path(temp_node)
            if checked_node and not checked_node.truncated:  # This means path is complete and unobstructed
                # If this is better then update graph
                if checked_node.cost < self.node_list[i_node].cost:
                    # Update children
                    node.parent.children.remove(node)
                    new_node.children.add(checked_node)
                    # Replace node with new one
                    self.node_list[i_node] = checked_node
                    self.propogate_cost_to_leaves(checked_node)

    def propogate_cost_to_leaves(self, parent_node, check_all=True):
        """
        Update cost of downstream (leaves of tree) element after rewiring
        """
        if check_all:  # Check every node (old method)
            for node in self.node_list:
                if node.parent == parent_node:
                    node.update_cost()
                    self.propogate_cost_to_leaves(node)
        else:  # Check only recorded child nodes (intended to speed up)
            for child_node in parent_node.children:
                child_node.update_cost()
                self.propogate_cost_to_leaves(child_node)

    def get_path(self, optimise=True, return_type='points'):
        if self.goal_node:
            path = []
            node = self.goal_node
            while node.parent is not None:
                path.insert(0, node)
                node = node.parent
            path.insert(0, node)  # Goal

            if optimise:
                # Remove unnecessary nodes
                for i_a, node_a in enumerate(path):
                    # for i_b in range(len(path)-1, i_a, -1):
                    for j, node_b in reversed(list(enumerate(path[i_a + 1:]))):
                        i_b = j + i_a + 1
                        temp_node = PathNode(node_b.coordinates, parent=node_a)
                        valid_node = self.create_valid_path(temp_node,
                                                            limit_path=False)
                        if valid_node and not valid_node.collision_avoided:
                            del path[i_a + 1:i_b]
                            break

            if return_type == 'points':
                path = [node.coordinates for node in path]

            return path
        else:
            print("No path was found last time")
            return None

    @staticmethod
    def plot_rectangle(x, y, dx, dy, color="b"):
        rect = patches.Rectangle((x - dx, y - dy),
                                 2 * dx,
                                 2 * dy,
                                 facecolor=color)
        # Add the patch to the Axes
        ax = plt.gca()
        ax.add_patch(rect)

    def draw_map(self, animation=False):
        plt.clf()
        # for stopping simulation with the esc key.
        plt.gcf().canvas.mpl_connect(
            'key_release_event',
            lambda event: [exit(0) if event.key == 'escape' else None])

        for (ox, oy, odx, ody) in self.obstacle_list:
            self.plot_rectangle(ox, oy, odx, ody)

        for node in self.node_list:
            plt.scatter([node.coordinates[0]], [node.coordinates[1]],
                        marker="o",
                        c="m",
                        s=4)
            if node.parent:
                plt.plot([x[0] for x in node.path], [x[1] for x in node.path],
                         "-m",
                         linewidth=1,
                         alpha=0.3)

        if self.goal_node:
            # node = self.goal_node
            full_path = self.get_path(optimise=False, return_type='points')
            plt.plot([x[0] for x in full_path], [x[1] for x in full_path],
                     "--g",
                     linewidth=2,
                     alpha=1)
            # while node is not None:
            #     plt.plot([x[0] for x in node.path], [x[1] for x in node.path], "-g", linewidth=2, alpha=1)
            #     node = node.parent
            short_path = self.get_path(optimise=True, return_type='points')
            plt.plot([x[0] for x in short_path], [x[1] for x in short_path],
                     "-g",
                     linewidth=3,
                     alpha=1)

        plt.plot(self.start.coordinates[0],
                 self.start.coordinates[1],
                 "xr",
                 linewidth=3)
        plt.plot(self.goal.coordinates[0],
                 self.goal.coordinates[1],
                 "xg",
                 linewidth=3)
        plt.axis(self.map_limits)
        plt.grid(True)

        if animation:
            plt.pause(0.05)
        else:
            plt.show()
Exemple #47
0
    def buildGraph(self, graph, numberSamples, knearest, tau, addState, eexy,
                   ee1Flag):
        tester = test_robot(self)
        points = self.grapple_points
        numberSamples_tmp = numberSamples
        D = [tau, 0.1]
        count = 0
        loop_count = 0

        diffAng = self.checkhv_ee(eexy)

        while count < numberSamples:
            samples = self.sampling_eexy(eexy, numberSamples, ee1Flag, diffAng)
            loop_count += 1
            addCount = 0
            for sp in range(numberSamples_tmp):
                addCount += 1
                rob = self.assign_config(samples, sp)
                if tester.self_collision_test(rob):
                    graph.addVertex(str(rob))
                    # points = rob.str2list()[:-1]
                    # points = rob.points
                    points = rob.get_position()
                    myarray = np.asarray(points)
                    r, c = myarray.shape
                    myarray = myarray.reshape(1, c * r)
                    if count == 0:
                        output = myarray
                        count += 1
                    else:
                        output = np.vstack([output, myarray])
                        count += 1
                    if count > numberSamples:
                        break
                else:
                    # print("add 20 obstacle samples")
                    self.obstacle_sampling_near_only(graph, rob, D, 1, tau)

        if addState == 1:
            init = self.get_init_state()
            graph.addVertex(str(init))
            # myarray = np.asarray(init.str2list()[:-1])
            myarray = np.asarray(init.get_position())
            r, c = myarray.shape
            myarray = myarray.reshape(1, c * r)
            output = np.vstack([output, myarray])
            goal = self.get_goal_state()

            graph.addVertex(str(goal))
            # myarray = np.asarray(goal.str2list()[:-1])
            myarray = np.asarray(goal.get_position())
            r, c = myarray.shape
            myarray = myarray.reshape(1, c * r)
            output = np.vstack([output, myarray])
            r, c = output.shape
            tree = KDTree(output, leaf_size=2)
            # print(output)
            for sp in range(r):
                if sp >= r - 2:
                    knearest = 2
                dist, ind = tree.query(output[sp:sp + 1], k=knearest)
                curNode = graph.getVerticeByInt(sp)
                m = self.str2robotConfig(curNode)
                for kn in range(1, knearest):
                    knNode = graph.getVerticeByInt(ind[0][kn])
                    q = self.str2robotConfig(knNode)
                    if (sp < r - 2):
                        if (tester.test_config_distance(m, q, self, tau)):
                            if curNode[-1] == knNode[-1]:
                                if int(curNode[-1]) == 1:
                                    if curNode.split(' ')[0] == knNode.split(
                                            ' ')[0] and curNode.split(' ')[
                                                1] == knNode.split(' ')[1]:
                                        graph.addEdge(curNode, knNode)
                                else:
                                    if curNode.split(' ')[0] == knNode.split(
                                            ' ')[0] and curNode.split(' ')[
                                                1] == knNode.split(' ')[1]:
                                        graph.addEdge(curNode, knNode)
                    else:
                        if curNode[-1] == knNode[-1]:
                            if int(curNode[-1]) == 1:
                                if curNode.split(' ')[0] == knNode.split(
                                        ' ')[0] and curNode.split(
                                            ' ')[1] == knNode.split(' ')[1]:
                                    graph.addEdge(curNode, knNode)
                            else:
                                if curNode.split(' ')[0] == knNode.split(
                                        ' ')[0] and curNode.split(
                                            ' ')[1] == knNode.split(' ')[1]:
                                    graph.addEdge(curNode, knNode)
Exemple #48
0
class ShapePCADataset(data.Dataset):
    def __init__(self,
                 arg,
                 dataset,
                 split,
                 pca_components=20,
                 trainset_sim=None):
        self.arg = arg
        self.dataset = dataset
        self.split = split
        self.pca_components = pca_components
        self.list = get_annotations_list(self.arg.dataset_route,
                                         dataset,
                                         split,
                                         arg.crop_size,
                                         ispdb=arg.PDB)  #[:1024]

        self.shapes = None
        self.pose_params = None
        self.aligned_shapes = None
        self.aligned_pose_params = None

        self.init_aligned_shapes(arg.crop_size)

        self.tree = None
        if trainset_sim is not None:
            self.trainset_sim = trainset_sim
            self.tree = KDTree(np.float32(self.trainset_sim.shapes))

    def init_aligned_shapes(self, crop_size):
        shapes = np.zeros((2 * kp_num[self.dataset], len(self.list)))
        for line_index, line in enumerate(self.list):
            coord_x = np.array(
                list(map(float, line[:2 * kp_num[self.dataset]:2])))
            coord_y = np.array(
                list(map(float, line[1:2 * kp_num[self.dataset]:2])))
            position_before = np.float32([[int(line[-7]),
                                           int(line[-6])],
                                          [int(line[-7]),
                                           int(line[-4])],
                                          [int(line[-5]),
                                           int(line[-4])]])
            position_after = np.float32([[0, 0], [0, crop_size - 1],
                                         [crop_size - 1, crop_size - 1]])
            crop_matrix = cv2.getAffineTransform(position_before,
                                                 position_after)
            coord_x_after_crop = crop_matrix[0][0] * coord_x + crop_matrix[0][
                1] * coord_y + crop_matrix[0][2]
            coord_y_after_crop = crop_matrix[1][0] * coord_x + crop_matrix[1][
                1] * coord_y + crop_matrix[1][2]
            shapes[0:2 * kp_num[self.dataset]:2,
                   line_index] = list(coord_x_after_crop)
            shapes[1:2 * kp_num[self.dataset]:2,
                   line_index] = list(coord_y_after_crop)

        aligned_shapes = shapes
        mean_shape = np.mean(aligned_shapes, 1)
        mean_shape_xy = coords_seq_to_xy(self.dataset, mean_shape)
        for i in range(len(aligned_shapes[0])):
            aligned_shape_xy = coords_seq_to_xy(self.dataset,
                                                aligned_shapes[:, i])
            tmp_error, tmp_shape, tmp_trans = procrustes(mean_shape_xy,
                                                         aligned_shape_xy,
                                                         reflection=False)
            aligned_shapes[:, i] = tmp_shape.reshape((1, -1), order='F')

        mean_shape = np.mean(aligned_shapes, 1)
        mean_shape = mean_shape.repeat(len(aligned_shapes[0])).reshape(
            -1, len(aligned_shapes[0]))
        aligned_shapes = aligned_shapes - mean_shape

        shapes = np.moveaxis(shapes, -1, 0)

        # img_show = np.zeros((crop_size, crop_size, 3), dtype=np.uint8)
        # idx = random.randint(0, shapes.shape[0] - 1)
        # for i in range(0, kp_num[self.dataset] - 1):
        #     draw_circle(img_show, (int(shapes[idx, 2*i]), int(shapes[idx, 2*i+1])))  # red
        #
        # show_img(img_show)

        pca = PCA(n_components=self.pca_components, svd_solver='full')
        pose_params = pca.fit_transform(shapes)

        aligned_shapes = np.moveaxis(aligned_shapes, -1, 0)
        pca_aligned = PCA(n_components=self.pca_components, svd_solver='full')
        aligned_pose_params = pca_aligned.fit_transform(aligned_shapes)

        self.shapes = shapes
        self.pose_params = pose_params

        self.aligned_shapes = aligned_shapes
        self.aligned_pose_params = aligned_pose_params

    def __len__(self):
        return len(self.list)

    def __getitem__(self, item):
        gt_coords_xy = np.float32(self.shapes[item])
        gt_heatmap = get_gt_heatmap(
            self.dataset, gt_coords_xy.reshape([2 * kp_num[self.arg.dataset]]),
            self.arg.crop_size, self.arg.sigma)
        pose_param = np.float32(self.pose_params[item])

        aligned_coords_xy = np.float32(self.aligned_shapes[item])
        aligned_pose_params = np.float32(self.aligned_pose_params[item])

        return gt_coords_xy, gt_heatmap, pose_param, aligned_coords_xy, aligned_pose_params

    def get_similars(self, shapes):
        if self.tree is not None:
            _, indexes = self.tree.query(coords_xy_to_seq(
                self.dataset, shapes))
            return tuple(
                map(torch.tensor,
                    zip(*[self.trainset_sim[i] for i in indexes])))
        return None
Exemple #49
0
class KdSampler:
    '''
    Represent the KD-Tree Sampling 
    '''

    def __init__(self, data, num_samples, start=(), goal=(), safety_dist=1):
        if type(start) is not tuple or type(goal) is not tuple:
            raise TypeError("Start and goal coordinate has to be a tuple")
        self._num_samples = num_samples
        self._min_z = 5
        self._max_z = 15
        self._start_node = start
        self._goal_node = goal
        self._area = Area(
            np.min(data[:, 0] - data[:, 3]), np.max(data[:, 0] - data[:, 3]), np.min(data[:, 1] - data[:, 4]),
            np.max(data[:, 1] - data[:, 4]))
        self._kept_samples = []
        self._removed_samples = []
        # Take the center of the obstacle to the KDTree
        # KDtree input and the query have to be the same dimensions
        self._obstKD_Tree = KDTree(data[:, 0:3])
        dist = np.linalg.norm(np.array(goal[:2]) - np.array(start[:2]))
        center = (np.array(goal[:2]) + np.array(start[:2])) / 2
        rad_deviation = 5   # to deal with a dead end or possible redirections
        radius = (dist / 2) + rad_deviation
        print("Air distance: ", dist)
        # Generate samples in the circle with diameter from start and goal points
        xvals, yvals = gen_circular_random(center, radius , num_samples, self._area)
        zvals = np.random.uniform(self._min_z, self._max_z, num_samples).astype(int)

        rand_3dsamples = list(zip(xvals, yvals, zvals))
        rand_3dsamples.append(tuple(start))
        rand_3dsamples.append(tuple(goal))

        # check the nearest obstacle centers

        for point3d in rand_3dsamples:
            # get the nearest 3 obstacle centers
            data_indices = self._obstKD_Tree.query([point3d], k=3, return_distance=False)[0]
            # check for the collision using polygon
            collision = False
            for i in data_indices:
                north, east, alt, d_north, d_east, d_alt = data[i, :]

                # YW NOTE: incorporate the safety distance in the obstacle object
                obstacle = Obstacle(north - d_north - safety_dist, north + d_north + safety_dist,
                                    east - d_east + safety_dist, east + d_east + safety_dist)
                corners = [(obstacle.north_min, obstacle.east_min), (obstacle.north_min, obstacle.east_max),
                           (obstacle.north_max, obstacle.east_max), (obstacle.north_max, obstacle.east_min)]
                height = alt + d_alt
                p = Polygon(corners)
                if p.contains(Point(point3d)) and (height >= point3d[2]):
                    #print("Colission => obstacle height: %d, sample height: %d" %(height, point3d[2]))
                    self._removed_samples.append(point3d)

                    point3d_list = list(point3d)
                    # list comparison only, avoid numpy array!
                    if point3d_list == start or point3d_list == goal:
                        print("WARNING: the start or goal node in {0}is removed!!".format(point3d))
                    collision = True
                    break
            if collision == False:
                self._kept_samples.append(point3d)

        # calculate the polygons of the obstacles
        self._polygons = []
        for i in range(data.shape[0]):
            north, east, alt, d_north, d_east, d_alt = data[i, :]

            #obstacle[north min, north max, east min, east max]
            obstacle = Obstacle(north - d_north - safety_dist, 
                                north + d_north + safety_dist, 
                                east - d_east - safety_dist, 
                                east + d_east+ safety_dist)
            corners = [
                (obstacle.north_min, obstacle.east_min),
                (obstacle.north_min, obstacle.east_max),
                (obstacle.north_max, obstacle.east_max),
                (obstacle.north_max, obstacle.east_min),
            ]
            height = alt + d_alt
            p = Polygon(corners)
            self._polygons.append((p, height))
    def fit(self, data):
        # 屏蔽开始

        # data: m * dim array
        m = data.shape[0]
        # print("m", m)

        tree = KDTree(data)

        W = np.zeros((m, m))

        for di, datum in enumerate(data):
            # neighbors' index
            if self.use_radius_nn_:
                nis, ndists = tree.query_radius([datum],
                                                self.nnradius_,
                                                return_distance=True)
            else:
                # the order of return value is different from query_radius!
                ndists, nis = tree.query([datum],
                                         self.nnk_ + 1,
                                         return_distance=True)

            nis = nis[0]
            ndists = ndists[0]
            # print("indices", nis)
            # print("ndists", ndists)
            # print(nis.shape)

            # if len(nis.shape) == 0: continue
            # print(di, nis, ndists)
            # print("neighbors",nis.shape)
            for ni, ndist in zip(nis, ndists):
                # the point itself will be one of its knn, need to skip it
                if ni == di: continue
                if self.use_gauss_dist_:
                    W[di][ni] = W[ni][di] = self.gauss_(ndist)
                else:
                    W[di][ni] = W[ni][di] = 1 / ndist

        D = np.diag(W.sum(axis=1))

        # unnormalized Laplacian
        L = D - W

        # for debugging
        self.W = W
        self.D = D

        if self.normalized_:
            L = a = np.matmul(LA.inv(D), L)
            L = b = np.identity(m) - np.matmul(LA.inv(D), W)
            assert (np.allclose(a, b))

        # for debugging
        self.L = L

        eigvals, eigvecs = LA.eig(L)
        """
        From numpy.linalg.eig's doc:
        The eigenvalues are not necessarily ordered!!
        so we need to sort eigen values!!
        """
        sorted_idx = np.argsort(eigvals)
        # smallest self.k_ eigenvectors
        V = eigvecs[:, sorted_idx[:self.k_]]

        # for debugging
        self.eigvals = eigvals
        self.eigvecs = eigvecs
        self.V = V

        # run kmeans
        self.labels_ = KMeans(n_clusters=self.k_).fit_predict(V)
Exemple #51
0
        # save sub_cloud and KDTree file
        sub_xyz, sub_colors, sub_labels = DP.grid_sub_sampling(
            sub_points, sub_colors, sub_labels, grid_size)
        sub_colors = sub_colors / 255.0
        sub_labels = np.squeeze(sub_labels)
        sub_ply_file = join(sub_pc_folder, file_name + '.ply')
        write_ply(sub_ply_file, [sub_xyz, sub_colors, sub_labels],
                  ['x', 'y', 'z', 'red', 'green', 'blue', 'class'])

        search_tree = KDTree(sub_xyz, leaf_size=50)
        kd_tree_file = join(sub_pc_folder, file_name + '_KDTree.pkl')
        with open(kd_tree_file, 'wb') as f:
            pickle.dump(search_tree, f)

        proj_idx = np.squeeze(
            search_tree.query(sub_points, return_distance=False))
        proj_idx = proj_idx.astype(np.int32)
        proj_save = join(sub_pc_folder, file_name + '_proj.pkl')
        with open(proj_save, 'wb') as f:
            pickle.dump([proj_idx, labels], f)

    else:
        full_ply_path = join(original_pc_folder, file_name + '.ply')
        write_ply(full_ply_path,
                  (pc[:, :3].astype(np.float32), pc[:, 4:7].astype(np.uint8)),
                  ['x', 'y', 'z', 'red', 'green', 'blue'])

        # save sub_cloud and KDTree file
        sub_xyz, sub_colors = DP.grid_sub_sampling(pc[:, :3].astype(
            np.float32),
                                                   pc[:, 4:7].astype(np.uint8),
Exemple #52
0
    tree = trees[i]
    if tree == None:
        continue
    for j in range(len(segment_pts_list)):
        pts = segment_pts_list[j]
        if segment_type_list[j] != 0 or i >= j or pts is 'None':
            continue
        neighborFlag = False
        neighborCount = 0
        for k in range(pts.shape[0]):
            sample_prob = 0.2
            if random.random() > sample_prob:
                continue
            if k > pts.shape[0] * 0.5 * sample_prob and neighborCount == 0:
                break
            dist, ind = tree.query(pts[k:k + 1, :3], k=1)
            if dist[0, 0] < 0.3:
                neighborCount += 1
            if neighborCount >= 5:
                neighborFlag = True
                break
        if neighborFlag:
            adj_matrix[segment_id_list[i], segment_id_list[j]] = 1
            adj_matrix[segment_id_list[j], segment_id_list[i]] = 1
            count_adj += 1
np.savetxt(os.path.join(files_dir, 'segment_adjacent_matrix.txt'), adj_matrix)

##########################################################################
## compute affinity matrix
affinity_matrix_dir = os.path.join(files_dir, 'segment_affinity_matrix.txt')
Exemple #53
0
def nearest_neighbor(df1, df2):
    from sklearn.neighbors import KDTree
    kdt = KDTree(df1.as_matrix())
    indexes = kdt.query(df2.as_matrix(), k=1, return_distance=False)
    return df1.index.values[indexes]
Exemple #54
0
print(q_path)

import ImageSearch_Algo_SIFT
# get the feature
q_kp, q_des = ImageSearch_Algo_SIFT.FEATURE(q_path)

predict_kmeans = kmeans.predict(q_des)
#calculates the histogram
hist1, bin_edges1 = np.histogram(predict_kmeans, bins=n_bins)
#histogram is the feature vector
q_feature_vector = hist1

# ------- Using KD TREE
# reshape - something wrong in this implementation
F = q_feature_vector.reshape(1, -1)
dist, result = SIFTtree.query(F, k=50)
print(result)
flist = list(mydataSIFT.iloc[result[0].tolist()]['file'])
slist = list(dist[0])
matches = tuple(zip(slist, flist))  # create a list of tuples from 2 lists

a, q, pos, cnt = accuracy.accuracy_matches(q_path, matches, 20)
print('Accuracy =', a, '%', '| Quality:', q)
print('Count', cnt, ' | position', pos)

# # using nearest neighbor
# dist, result = neighbor.kneighbors([q_feature_vector])
# print (result)

# flist = list (mydataSIFT.iloc[ result[0].tolist()]['file'])
# slist = list (dist[0])
Exemple #55
0
class Loop:
    def __init__(self,
                 data,
                 k=4,
                 leaf_size=40,
                 lambdav=.5,
                 lap_vec=None,
                 bins=None):

        self.tree = KDTree(data, leaf_size=leaf_size)
        self.data = data

        if bins is not None:
            self.is_binned = True
            self.discretizer = preprocessing.KBinsDiscretizer(
                n_bins=bins, encode='ordinal').fit(data)
            self.data_binned = self.discretizer.inverse_transform(
                self.discretizer.transform(data))
            self.bin_tree = KDTree(self.data_binned, leaf_size=leaf_size)

        self.k = k
        self.lambdav = lambdav

        self.knn_dist_arr, self.knn_idx_arr = self.knn(data)
        self._pdist = self.pdist(data, self.knn_dist_arr, lap_vec)

        self._plof, self.nplof = self.plof(data, self.knn_idx_arr, self._pdist)
        self.loop_values = self.loop(self._plof)

    def knn(self, o):
        if self.is_binned:
            return self.bin_tree.query(o, k=self.k)
        else:
            return self.tree.query(o, k=self.k)

    def pdist(self, o, knn_dist, lap_vec=None):
        if lap_vec is None:
            lap_vec = np.zeros(len(o))
        else:
            assert len(lap_vec) == len(
                o), "lap_vec length is unequal data length"

        stddist = LA.norm(knn_dist, axis=1) / self.k + lap_vec
        pdist = stddist * self.lambdav
        return pdist

    def plof(self, o, knn_idx_arr, pdist):
        expected_pdist = np.empty(len(o))

        for idx, element in enumerate(o):
            element_neighbors = knn_idx_arr[idx]
            expected_pdist[idx] = np.mean(self._pdist[element_neighbors])

        plof = pdist / expected_pdist - 1
        nplof = np.sqrt(self.lambdav * np.mean(plof**2))
        return plof, nplof

    def loop(self, plof):
        erf_input = plof / (np.sqrt(2) * self.nplof)
        erf = special.erf(erf_input)
        loop_values = np.max(np.vstack((np.zeros(len(plof)), erf)), axis=0)
        return loop_values

    def query_loop(self, o):
        knn_dist, knn_idx = self.knn(o)
        pdist = self.pdist(o, knn_dist)
        plof, _ = self.plof(o, knn_idx, pdist)
        loop_values = self.loop(plof)
        return loop_values

    @staticmethod
    def benchmark(train,
                  test,
                  test_labels,
                  ks,
                  lambdav=3,
                  normalize=True,
                  lap_vec=None,
                  bins=None):
        if normalize:
            max_abs_scaler = preprocessing.MaxAbsScaler()
            standard_scaler = preprocessing.StandardScaler(with_std=False)
            train = max_abs_scaler.fit_transform(
                standard_scaler.fit_transform(train))
            test = max_abs_scaler.transform(standard_scaler.transform(test))

        aurocs = np.zeros(len(ks))
        for i, k in enumerate(ks):
            loop_wdbc = Loop(train,
                             lambdav=lambdav,
                             k=k,
                             lap_vec=lap_vec,
                             bins=bins)
            aurocs[i] = sklearn.metrics.roc_auc_score(
                test_labels, loop_wdbc.query_loop(test))
        return aurocs

    @staticmethod
    def bin_benchmark(train,
                      test,
                      test_labels,
                      lambdav=3,
                      normalize=True,
                      lap_vec=None,
                      bins_size=range(2, 10),
                      k=20):
        if normalize:
            max_abs_scaler = preprocessing.MaxAbsScaler()
            standard_scaler = preprocessing.StandardScaler(with_std=False)
            train = max_abs_scaler.fit_transform(
                standard_scaler.fit_transform(train))
            test = max_abs_scaler.transform(standard_scaler.transform(test))

        aurocs = np.zeros(len(bins_size))
        data_dim = train.shape[1]
        for i, b in enumerate(bins_size):
            bins = [b] * data_dim
            loop_wdbc = Loop(train,
                             lambdav=lambdav,
                             k=k,
                             lap_vec=lap_vec,
                             bins=bins)
            aurocs[i] = sklearn.metrics.roc_auc_score(
                test_labels, loop_wdbc.query_loop(test))
        return aurocs

    @staticmethod
    def r_unif(train_data, k):
        no_features = train_data.shape[1]
        omega_volume = 2**no_features  # consider to scale down to -.5,.5 so that this would be 1
        data_cardinality = len(train_data)
        return ((special.gamma(1 + no_features / 2) * k * omega_volume) /
                (data_cardinality * np.pi**(no_features / 2)))**(1 /
                                                                 no_features)
Exemple #56
0
def score_path(path):
    return score_chunk(0, path)

# 4. Precompute close cities pairs using KDTree.

# In[ ]:


kdt = KDTree(XY)

# In[ ]:


pairs = set()
for city_id in tqdm(cities.index):
    dists, neibs = kdt.query([XY[city_id]], 31)
    for neib_id in neibs[0][1:]:
        if city_id and neib_id:  # skip pairs that include starting city 
            pairs.add(tuple(sorted((city_id, neib_id))))
    neibs = kdt.query_radius([XY[city_id]], 31, count_only=False, return_distance=False)
    for neib_id in neibs[0]:
        if city_id and neib_id and city_id != neib_id:
            pairs.add(tuple(sorted((city_id, neib_id))))

print(f'{len(pairs)} cities pairs are selected.')
# sort pairs by distance
pairs = np.array(list(pairs))
distances = np.sum((XY[pairs.T[0]] - XY[pairs.T[1]])**2, axis=1)
order = distances.argsort()
pairs = pairs[order]
Exemple #57
0
def get_recall(m, n, database_vectors, query_vectors, query_sets, database_sets, log=False):
    # Original PointNetVLAD code
    database_output = database_vectors[m]
    queries_output = query_vectors[n]

    # When embeddings are normalized, using Euclidean distance gives the same
    # nearest neighbour search results as using cosine distance
    database_nbrs = KDTree(database_output)

    num_neighbors = 25
    recall = [0] * num_neighbors

    top1_similarity_score = []
    one_percent_retrieved = 0
    threshold = max(int(round(len(database_output)/100.0)), 1)

    num_evaluated = 0
    for i in range(len(queries_output)):
        # i is query element ndx
        query_details = query_sets[n][i]    # {'query': path, 'northing': , 'easting': }
        true_neighbors = query_details[m]
        if len(true_neighbors) == 0:
            continue
        num_evaluated += 1
        distances, indices = database_nbrs.query(np.array([queries_output[i]]), k=num_neighbors)
        if log:
            # Log 10% of false positives (returned as the first element) for Oxford dataset
            # Check if there's a false positive returned as the first element
            if query_details['query'][:6] == 'oxford' and indices[0][0] not in true_neighbors and random.random() < 0.1:
                fp_ndx = indices[0][0]
                fp = database_sets[m][fp_ndx]  # Database element: {'query': path, 'northing': , 'easting': }
                fp_emb_dist = distances[0, 0]  # Distance in embedding space
                fp_world_dist = np.sqrt((query_details['northing'] - fp['northing']) ** 2 +
                                        (query_details['easting'] - fp['easting']) ** 2)
                # Find the first true positive
                tp = None
                for k in range(len(indices[0])):
                    if indices[0][k] in true_neighbors:
                        closest_pos_ndx = indices[0][k]
                        tp = database_sets[m][closest_pos_ndx]  # Database element: {'query': path, 'northing': , 'easting': }
                        tp_emb_dist = distances[0][k]
                        tp_world_dist = np.sqrt((query_details['northing'] - tp['northing']) ** 2 +
                                                (query_details['easting'] - tp['easting']) ** 2)
                        break

                with open("log_fp.txt", "a") as f:
                    s = "{}, {}, {:0.2f}, {:0.2f}".format(query_details['query'], fp['query'], fp_emb_dist, fp_world_dist)
                    if tp is None:
                        s += ', 0, 0, 0\n'
                    else:
                        s += ', {}, {:0.2f}, {:0.2f}\n'.format(tp['query'], tp_emb_dist, tp_world_dist)
                    f.write(s)

            if query_details['query'][:6] == 'oxford' and len(indices[0]) >= 5 and random.random() < 0.01:
                # For randomly selected 1% of queries save details of 5 best matches for later visualization
                s = "{}, ".format(query_details['query'])
                for k in range(min(len(indices[0]), 5)):
                    is_match = indices[0][k] in true_neighbors
                    e_ndx = indices[0][k]
                    e = database_sets[m][e_ndx]     # Database element: {'query': path, 'northing': , 'easting': }
                    e_emb_dist = distances[0][k]
                    s += ', {}, {:0.2f}, {}, '.format(e['query'], e_emb_dist, 1 if is_match else 0)
                s += '\n'
                out_file_name = "log_search_results.txt"
                with open(out_file_name, "a") as f:
                    f.write(s)

        for j in range(len(indices[0])):
            if indices[0][j] in true_neighbors:
                if j == 0:
                    similarity = np.dot(queries_output[i], database_output[indices[0][j]])
                    top1_similarity_score.append(similarity)
                recall[j] += 1
                break

        if len(list(set(indices[0][0:threshold]).intersection(set(true_neighbors)))) > 0:
            one_percent_retrieved += 1

    one_percent_recall = (one_percent_retrieved/float(num_evaluated))*100
    recall = (np.cumsum(recall)/float(num_evaluated))*100
    # print(recall)
    # print(np.mean(top1_similarity_score))
    # print(one_percent_recall)
    return recall, top1_similarity_score, one_percent_recall
Exemple #58
0
class Mesh:
    def __init__(self,
                 filename,
                 texmappath,
                 exclude=[],
                 epsilon=0.0,
                 verbose=False):
        self.faces = {}  #  Hold Face objects
        self.v = 0  #  Number of LISTED vertices in the whole mesh
        #  (This number may be inflated by duplicates)
        self.vt = 0  #  Number of LISTED texture vertices in the whole mesh
        #  (This number may be inflated by duplicates)
        self.barycenters2d = None  #  To become a query-able KDTree
        self.barycenters3d = None  #  To become a query-able KDTree
        self.filename = filename  #  Name of the mesh obj
        self.imgformats = ['png', 'jpg',
                           'jpeg']  #  Acceptable texture map file formats
        self.texmappath = texmappath  #  Path from script to matterport materials
        self.texmaporigin = 'ul'  #  Indication of which corner is texture map origin:
        #  {'ul', 'll', 'ur', 'lr'} respectively for
        #  upper-left, lower-left, upper-right, lower-right
        self.epsilon = epsilon  #  Acceptable discrepancies between points
        #  to be considered the same
        self.reconcile = True  #  Whether we should bother reconciling triangle soup
        #  with epsilon-distances
        self.verbose = verbose  #  Loading these and reconciling triangle soup can
        #  take a while; show signs of life
        self.filesizes = {}  #  Save time by looking these up once
        for imgfile in os.listdir(texmappath):  #  For every texmap...
            imgfilename = imgfile.split(
                '.')  #  if it's an known format and not omitted...
            if imgfilename[-1].lower() in self.imgformats:
                if imgfile not in exclude:
                    texmap = cv2.imread(texmappath + '/' + imgfile,
                                        cv2.IMREAD_COLOR)
                    #  Save to lookup table of width and height
                    self.filesizes[imgfile] = (len(texmap[0]), len(texmap))
                    if self.verbose:  #  Show the texture map dimensions
                        print('  ' + imgfile + ': ' + str(len(texmap[0])) +
                              ' x ' + str(len(texmap)))
                elif self.verbose:  #  Show that we're omitting a file by request
                    print('  Excluding ' + imgfile)
        self.vertexLookup = {
        }  #  Look up a vertex index to find a list of all faces
        #  to which it contributes.
        self.sames = {}  #  Look up a vertex index to find a list of vertices
        #  we consider "equal to it within epsilon."

    #  Read the OBJ file line by line. Accumulate 3D vertex and texmap (2D) vertex information, and build
    #  an instance of the Face class once we have enough information for a face.
    def load(self):
        v = {}  #  Vertices
        vctr = 1  #  Vertex index counter

        vt = {}  #  Texture coordinates
        vtctr = 1  #  Texture coordinates' index counter

        fctr = 0  #  Face index counter is free to start with zero
        #  because it is never referred to by other data types
        #  in the OBJ format
        currentMaterial = None  #  Track which material is currently applied

        if self.verbose:
            print('\n  Loading mesh from ' + self.filename)

        fh = open(self.filename, 'r')  #  Read entire file
        lines = fh.readlines()
        fh.close()

        if self.verbose:  #########################################  3D VERTICES
            print('  Reading vertices...')
        for line in lines:  #  Make one initial pass during which we only
            arr = line.strip().split()  #  care about the vertices.
            if len(arr) > 0:
                if arr[0] == 'v':
                    x = float(arr[1])
                    y = float(arr[2])
                    z = float(arr[3])
                    v[vctr] = (x, y, z
                               )  #  Add the vctr-th vertex to the hash table
                    self.vertexLookup[vctr] = [
                    ]  #  Prepare a running list of every face
                    vctr += 1  #  that uses this vertex
        if self.verbose:
            print('    ' + str(vctr - 1) + ' vertices')

        allV = [v[x] for x in range(1, vctr)
                ]  #  Build complete list by vertex index
        redundancyTree = KDTree(allV)  #  Turn it into a tree

        #  Find all vertices within epsilon of each other:
        #  we're going to call them "The Same," but only so
        #  we can use them to find neighbors in triangle soup.
        if self.reconcile:  #  So... IGNORE this step if our application doesn't
            if self.verbose:  #  care about triangle adjacency!
                print('  Reconciling triangle soup with epsilon ' +
                      str(self.epsilon) + '...')
            samectr = 0
            for vnum in range(1, vctr):  #  Perform test for every vertex.
                ind = redundancyTree.query_radius(np.array([v[vnum]]),
                                                  self.epsilon)
                ind = [x + 1 for x in ind[0] if x + 1 != vnum]
                if len(ind) > 0:
                    self.sames[vnum] = ind
                    samectr += 1
                    if self.verbose:
                        sys.stdout.write(
                            '    %d epsilon-equivalent vertices found\r' %
                            samectr)
                        sys.stdout.flush()

            if self.verbose:
                print('')

        if self.verbose:  #########################################  2D (TEXMAP) VERTEX
            print('  Reading texture map vertices...')
        for line in lines:
            arr = line.strip().split()
            if len(arr) > 0:  #  Make sure line actually had content
                if arr[0] == 'vt':
                    u = float(arr[1])
                    w = float(arr[2])
                    vt[vtctr] = (
                        u, w)  #  Add the vtctr-th vertex to the hash table
                    vtctr += 1

        if self.verbose:  #########################################  FACE
            print('  Reading faces...')
        for line in lines:
            arr = line.strip().split()
            if len(arr) > 0:  #  Make sure line actually had content
                if arr[0] == 'f':

                    subarr = arr[1].split('/')  #  Split v/vt pair
                    a1 = int(subarr[0])  #  Save v index
                    a2 = int(subarr[1])  #  Save vt index

                    subarr = arr[2].split('/')  #  Split v/vt pair
                    b1 = int(subarr[0])  #  Save v index
                    b2 = int(subarr[1])  #  Save vt index

                    subarr = arr[3].split('/')  #  Split v/vt pair
                    c1 = int(subarr[0])  #  Save v index
                    c2 = int(subarr[1])  #  Save vt index

                    texmapW = self.filesizes[currentMaterial][
                        0]  #  Retrieve actual dimensions of this texmap
                    texmapH = self.filesizes[currentMaterial][
                        1]  #  so we can get actual pixel locations

                    self.faces[fctr] = Face()  #  New face...
                    #  made of these three 3D vertices...
                    self.faces[fctr].set3DTriangle(v[a1], v[b1], v[c1])
                    #  which have these three OBJ indices...
                    self.faces[fctr].set3DTriangleIndices(a1, b1, c1)
                    #  skinned with this 2D triangle...
                    if self.texmaporigin == 'ul':  #  (Origin in upper-left corner)
                        self.faces[fctr].set2DTriangle( (vt[a2][0] * texmapW, vt[a2][1] * texmapH), \
                                                        (vt[b2][0] * texmapW, vt[b2][1] * texmapH), \
                                                        (vt[c2][0] * texmapW, vt[c2][1] * texmapH) )
                    elif self.texmaporigin == 'll':  #  (Origin in lower-left corner)
                        self.faces[fctr].set2DTriangle( (vt[a2][0] * texmapW, texmapH - vt[a2][1] * texmapH), \
                                                        (vt[b2][0] * texmapW, texmapH - vt[b2][1] * texmapH), \
                                                        (vt[c2][0] * texmapW, texmapH - vt[c2][1] * texmapH) )
                    elif self.texmaporigin == 'lr':  #  (Origin in lower-right corner)
                        self.faces[fctr].set2DTriangle( (texmapW - vt[a2][0] * texmapW, texmapH - vt[a2][1] * texmapH), \
                                                        (texmapW - vt[b2][0] * texmapW, texmapH - vt[b2][1] * texmapH), \
                                                        (texmapW - vt[c2][0] * texmapW, texmapH - vt[c2][1] * texmapH) )
                    else:  #  (Origin in upper-right corner)
                        self.faces[fctr].set2DTriangle( (texmapW - vt[a2][0] * texmapW, vt[a2][1] * texmapH), \
                                                        (texmapW - vt[b2][0] * texmapW, vt[b2][1] * texmapH), \
                                                        (texmapW - vt[c2][0] * texmapW, vt[c2][1] * texmapH) )
                    self.faces[fctr].set2DTriangleIndices(a2, b2, c2)
                    #  ...which has these three OBJ indices...
                    self.faces[
                        fctr].texmap = currentMaterial  #  ...and which comes from this texture map

                    self.vertexLookup[a1].append(
                        fctr
                    )  #  Keep a running list of faces touching this vertex
                    self.vertexLookup[b1].append(
                        fctr
                    )  #  Keep a running list of faces touching this vertex
                    self.vertexLookup[c1].append(
                        fctr
                    )  #  Keep a running list of faces touching this vertex

                    fctr += 1

                elif arr[
                        0] == 'usemtl':  #  Change the currently applied material
                    currentMaterial = arr[1]
        if self.verbose:
            print('    ' + str(fctr) + ' faces')

        tree2d = [list(self.faces[x].barycenter2D) for x in range(0, fctr)]
        tree3d = [list(self.faces[x].barycenter3D) for x in range(0, fctr)]

        self.barycenters2d = KDTree(tree2d)
        self.barycenters3d = KDTree(tree3d)

        self.v = vctr - 1  #  Save for reference
        self.vt = vtctr - 1

        return

    def query2d(self, pt, a, b):
        dist, ind = self.barycenters2d.query(np.array([list(pt)]), k=b)
        return list(ind[0])[a:b + 1]

    def computeFaceNeighbors(self):
        for i in range(0, len(self.faces)):  #  For each face in the mesh
            n = []  #  prepare a list of all neighbor faces.
            for v in self.faces[
                    i].t3Dindices:  #  Look up each vertex in each face
                n += [
                    x for x in self.vertexLookup[v] if x != i
                ]  #  and add as neighbor-faces all faces formed by this vertex.

            s = []
            for v in self.faces[i].t3Dindices:
                if v in self.sames:
                    for same in self.sames[v]:
                        s += [x for x in self.vertexLookup[same] if x != i]

            n += s
            self.faces[i].neighbors = list(dict.fromkeys(
                n))  #  Remove duplicate entries and store in Face class

        return
def convert_txt2ply(save_path=None, sub_grid_size=0.06):
    """convert original files to ply file(each line is XYZRGBL).

    Args:
        save_path ([type], optional): [description]. Defaults to None.
        sub_grid_size (float, optional): [description]. Defaults to 0.06.
    """
    make_dir(sub_grid_size)

    for pointcloud_path in glob.glob(
            os.path.join(semantic3d_data_path, '*.txt')):
        print(pointcloud_path)
        filename = pointcloud_path.split('/')[-1][:-4]

        if os.path.exists(
                os.path.join(sub_pointcloud_folder, filename + '_KDTree.pkl')):
            continue

        pointcloud = DataProcessing.load_pc_semantic3d(pointcloud_path)
        label_path = pointcloud_path[:-4] + '.labels'
        print(label_path)
        if os.path.exists(label_path):
            labels = DataProcessing.load_label_semantic3d(label_path)
            full_ply_path = os.path.join(original_pointcloud_folder,
                                         filename + '.ply')

            sub_points, sub_colors, sub_labels = DataProcessing.grid_sub_sampling(
                pointcloud[:, :3].astype(np.float32),
                pointcloud[:, 4:7].astype(np.uint8), labels, 0.01)
            sub_labels = np.squeeze(sub_labels)
            ply.write_ply(full_ply_path, (sub_points, sub_colors, sub_labels),
                          ['x', 'y', 'z', 'red', 'green', 'blue', 'class'])

            sub_xyz, sub_colors, sub_labels = DataProcessing.grid_sub_sampling(
                sub_points, sub_colors, sub_labels, sub_grid_size)
            sub_colors = sub_colors / 255.0
            sub_labels = np.squeeze(sub_labels)
            sub_ply_file = os.path.join(sub_pointcloud_folder,
                                        filename + '.ply')
            ply.write_ply(sub_ply_file, [sub_xyz, sub_colors, sub_labels],
                          ['x', 'y', 'z', 'red', 'green', 'blue', 'class'])

            search_tree = KDTree(sub_xyz, leaf_size=50)
            kd_tree_file = os.path.join(sub_pointcloud_folder,
                                        filename + '_KDTree.pkl')
            with open(kd_tree_file, 'wb') as f:
                pickle.dump(search_tree, f)

            proj_idx = np.squeeze(
                search_tree.query(sub_points, return_distance=False))
            proj_idx = proj_idx.astype(np.int32)
            proj_save = os.path.join(sub_pointcloud_folder,
                                     filename + '_proj.pkl')
            with open(proj_save, 'wb') as f:
                pickle.dump([proj_idx, labels], f)

        else:
            fully_ply_path = os.path.join(original_pointcloud_folder,
                                          filename + '.ply')
            ply.write_ply(fully_ply_path, (pointcloud[:, :3].astype(
                np.float32), pointcloud[:, 4:7].astype(np.uint8)),
                          ['x', 'y', 'z', 'red', 'green', 'blue'])

            sub_xyz, sub_colors = DataProcessing.grid_sub_sampling(
                pointcloud[:, :3].astype(np.float32),
                pointcloud[:, 4:7].astype(np.uint8),
                grid_size=sub_grid_size)
            sub_colors = sub_colors / 255.0
            sub_ply_file = os.path.join(sub_pointcloud_folder,
                                        filename + '.ply')
            ply.write_ply(sub_ply_file, [sub_xyz, sub_colors],
                          ['x', 'y', 'z', 'red', 'green', 'blue'])
            labels = np.zeros(pointcloud.shape[0], dtype=np.uint8)

            search_tree = KDTree(sub_xyz, leaf_size=50)
            kd_tree_file = os.path.join(sub_pointcloud_folder,
                                        filename + '_KDTree.pkl')
            with open(kd_tree_file, 'wb') as f:
                pickle.dump(search_tree, f)

            proj_idx = np.squeeze(
                search_tree.query(pointcloud[:, :3].astype(np.float32),
                                  return_distance=False))
            proj_idx = proj_idx.astype(np.int32)
            proj_save = os.path.join(sub_pointcloud_folder,
                                     filename + '_proj.pkl')
            with open(proj_save, 'wb') as f:
                pickle.dump([proj_idx, labels], f)
Exemple #60
0
class LRU_KNN_PS(object):
    def __init__(self,
                 capacity,
                 obs_shape,
                 z_dim,
                 env_name,
                 action,
                 num_actions=6,
                 knn=4,
                 debug=True,
                 gamma=0.99,
                 alpha=0.1,
                 beta=0.01):
        self.obs = np.empty((capacity, ) + obs_shape, dtype=np.uint8)
        self.action = action
        self.alpha = alpha
        self.beta = beta
        self.z_dim = z_dim
        self.env_name = env_name
        self.capacity = capacity
        self.num_actions = num_actions
        self.rmax = 100000
        self.states = np.empty((capacity, z_dim), dtype=np.float32)
        self.external_value = np.full((capacity, num_actions), np.nan)
        self.state_value_v = np.full((capacity, ), np.nan)
        self.state_value_u = np.full((capacity, ), np.nan)
        self.reward = np.zeros((capacity, num_actions))
        self.done = np.zeros((capacity, num_actions), dtype=np.bool)
        self.newly_added = np.ones((capacity, num_actions), dtype=np.bool)
        self.internal_value = self.rmax * np.ones((capacity, num_actions))
        self.prev_id = [[] for _ in range(capacity)]
        self.next_id = [[{} for __ in range(num_actions)]
                        for _ in range(capacity)]
        self.pseudo_count = [[{} for __ in range(num_actions)]
                             for _ in range(capacity)]
        self.pseudo_reward = np.zeros((capacity, num_actions))
        self.pseudo_prev = [{} for _ in range(capacity)]
        self.debug = debug
        self.count = np.zeros((capacity, num_actions))
        self.lru = np.zeros(capacity)
        # self.best_action = np.zeros((capacity, num_actions), dtype=np.int)
        self.curr_capacity = 0
        self.tm = 0.0
        self.threshold = 1e-7
        self.knn = knn
        self.gamma = gamma
        self.b = 0.01
        self.knn = knn
        # self.beta = beta
        self.tree = None
        self.logger = logging.getLogger("ecbp")

    def log(self, *args, logtype='debug', sep=' '):
        getattr(self.logger, logtype)(sep.join(str(a) for a in args))

    def build_tree(self):
        if self.curr_capacity == 0:
            return False
        self.tree = KDTree(self.states[:self.curr_capacity], leaf_size=10)
        return True

    def peek(self, key):
        if self.curr_capacity == 0 or self.tree is None:
            return -1, [], []
        # print(np.array(key).shape)
        key = np.array(key, copy=True)
        if len(key.shape) == 1:
            key = key[np.newaxis, ...]
        dist, ind = self.tree.query(key, k=min(self.knn, self.curr_capacity))
        # dist, ind = knn_cuda_fixmem.knn(self.address, key, 1, self.curr_capacity)
        # dist, ind = np.transpose(dist), np.transpose(ind - 1)
        ind_n = ind[0][0]
        if dist[0][0] < self.threshold:
            return ind_n, dist, ind
        return -1, dist, ind

    def act_value(self, key, knn):
        knn = min(self.curr_capacity, knn)

        internal_values = []
        external_values = []
        exact_refer = []
        if knn < 1 or self.tree is None:
            for i in range(len(key)):
                internal_values.append(self.rmax * np.ones(self.num_actions))
                external_values.append(np.zeros(self.num_actions))
                exact_refer.append(False)
            return external_values, internal_values, np.array(exact_refer)

        key = np.array(key, copy=True)

        if len(key.shape) == 1:
            key = key[np.newaxis, ...]
        assert key.shape[0] == 1
        dist, ind = self.tree.query(key, k=min(knn + 1, self.curr_capacity))
        # dist, ind = knn_cuda_fixmem.knn(self.address, key, knn, self.curr_capacity)
        # dist, ind = np.transpose(dist), np.transpose(ind - 1)
        # print(dist.shape, ind.shape, len(key), key.shape)
        # print("nearest dist", dist[0][0])
        external_value = np.zeros(self.num_actions)
        external_nan_mask = np.full((self.num_actions, ), np.nan)
        internal_value = self.rmax * np.ones(self.num_actions)
        old_mask = np.array([[1 - self.newly_added[i] for i in query]
                             for query in ind]).astype(np.bool)
        ind, dist = ind[old_mask].reshape(1, -1), dist[old_mask].reshape(1, -1)
        for i in range(len(dist)):
            coeff = -dist[i] / self.b
            coeff = coeff - np.max(coeff)
            coeff = np.exp(coeff)
            coeff = coeff / np.sum(coeff)
            if dist[i][0] < self.threshold and not np.isnan(
                    self.external_value[ind[i][0]]).all():

                self.log("peek in act ", ind[i][0])
                exact_refer.append(True)
                external_value = copy.deepcopy(self.external_value[ind[i][0]])
                internal_value = copy.deepcopy(self.internal_value[ind[i][0]])
                # external_value[np.isnan(external_value)] = 0
                self.lru[ind[i][0]] = self.tm
                self.tm += 0.01
            else:
                exact_refer.append(False)
                for j, index in enumerate(ind[i]):
                    tmp_external_value = copy.deepcopy(
                        self.external_value[index, :])
                    tmp_external_value[np.isnan(tmp_external_value)] = 0
                    external_nan_mask[(1 -
                                       np.isnan(tmp_external_value)).astype(
                                           np.bool)] = 0
                    external_value += tmp_external_value * coeff[j]
                    self.lru[index] = self.tm
                    self.tm += 0.01
                external_value += external_nan_mask
            external_values.append(external_value)
            internal_values.append(internal_value)

        return external_values, internal_values, np.array(exact_refer)

    def add_edge(self, src, des, action, reward, done):
        if (src, action) not in self.prev_id[des]:
            self.prev_id[des].append((src, action))
            self.newly_added[src, action] = True
        try:
            self.next_id[src][action][des] += 1
        except KeyError:
            self.next_id[src][action][des] = 1
        if self.internal_value[src, action] > 0 and sum(
                self.next_id[src][action].values()) > 5:
            self.internal_value[src, action] = 0
        self.reward[
            src,
            action] = reward  # note that we assume that reward function is deterministic
        self.done[src, action] = done
        return sum(self.next_id[src][action].values())

    def add_node(self, key, obs=None):
        # print(np.array(key).shape)
        if self.curr_capacity >= self.capacity:
            # find the LRU entry
            old_index = int(np.argmin(self.lru))
            for action in range(self.num_actions):
                for successor in self.next_id[old_index][action].keys():
                    for s, a in self.prev_id[successor]:
                        if s == old_index:
                            self.prev_id.remove((s, a))
                self.next_id[old_index][action] = dict()
            self.states[old_index] = key
            self.external_value[old_index] = np.full((self.num_actions, ),
                                                     np.nan)
            self.internal_value[old_index] = self.rmax * np.ones(
                self.num_actions)
            self.state_value_u[old_index] = np.nan
            self.state_value_v[old_index] = np.nan
            self.lru[old_index] = self.tm
            self.count[old_index] = 2
            if obs is not None:
                self.obs[old_index] = obs
            self.prev_id[old_index] = []
            # knn_cuda_fixmem.add(self.address, old_index, np.array(key))
            self.tm += 0.01
            # self.build_tree()
            return old_index, True

        else:
            self.states[self.curr_capacity] = key
            self.lru[self.curr_capacity] = self.tm
            self.count[self.curr_capacity] = 2
            if obs is not None:
                self.obs[self.curr_capacity] = obs
            # knn_cuda_fixmem.add(self.address, self.curr_capacity, np.array(key))
            self.curr_capacity += 1
            self.tm += 0.01
            # self.build_tree()
            return self.curr_capacity - 1, False

    @staticmethod
    def distance(a, b):
        return np.sqrt(np.sum(np.square(a - b)))

    def update_q_value(self, state, action, state_tp1, delta_u):
        successor_states = self.next_id[state][action].keys()
        weight = {s: self.next_id[state][action][s] for s in successor_states}
        trans_p = weight[state_tp1] / sum(weight.values())
        assert 0 <= trans_p <= 1
        if np.isnan(self.external_value[state, action]):
            self.external_value[state, action] = self.reward[state, action]
        self.external_value[state, action] += self.gamma * trans_p * delta_u

    def sample(self, sample_size):
        sample_size = min(self.curr_capacity, sample_size)
        if sample_size % 2 == 1:
            sample_size -= 1
        if sample_size < 2:
            return None
        indexes = []
        positives = []
        values = []
        actions = []
        while len(indexes) < sample_size:
            ind = int(np.random.randint(0, self.curr_capacity, 1))
            if ind in indexes:
                continue
            next_id_tmp = [[(a, ind_tp1)
                            for ind_tp1 in self.next_id[ind][a].keys()]
                           for a in range(self.num_actions)]
            next_id = []
            for x in next_id_tmp:
                next_id += x
            # next_id = np.array(next_id).reshape(-1)
            if len(next_id) == 0:
                continue
            positive = next_id[np.random.randint(0, len(next_id))][1]
            action = next_id[np.random.randint(0, len(next_id))][0]
            indexes.append(ind)
            positives.append(positive)
            actions.append(action)
            values.append(np.nanmax(self.external_value[ind, :]))

        negatives = [
            int((pos + sample_size // 2) % sample_size) for pos in positives
        ]
        z_target = [self.states[ind] for ind in indexes]
        z_pos = [self.states[pos] for pos in positives]
        z_neg = [self.states[neg] for neg in negatives]
        return indexes, positives, negatives, z_target, z_pos, z_neg, values, actions

    def update(self, indexes, z_new):
        self.log("update in buffer", self.curr_capacity)
        assert len(indexes) == len(z_new), "{}{}".format(
            len(indexes), len(z_new))
        assert z_new.shape[1] == self.z_dim
        for i, ind in enumerate(indexes):
            self.states[ind] = z_new[i]