Esempio n. 1
def _proximity_filter(point, data, total):
    Given a point, and a data list of coordinate tuples, we return an n number of coordinate tuples
    amounting to total
    tree = KDTree.construct_from_data(data)
    return tree.query(query_point=point, t=total)
Esempio n. 2
def autodiscover():
    global kd_tree
    from pixel.models import Pixel
    cc = list(Pixel.objects.all())
    kd_tree = KDTree.construct_from_data(cc)
Esempio n. 3
 def __init__(self, eps, MinPts, pointlist):
     self.eps = eps
     self.MinPts = MinPts
     self.points = pointlist
     self.unvisited = [i for i in range(len(pointlist))]
     self.kdtree = KDTree.construct_from_data(self.formatpoints())
     self.pointidmap = {}
     for point in pointlist:
         self.pointidmap[tuple(point.coordinates)] =
Esempio n. 4
 def createTrackpointTree(self, trackpoints):
     Create a tree out of the trackpoints
     self.track_tupel_list = []
     #Change from Vec3 to tupel
     for point in self.trackpoints:
             (point.getX(), point.getY(), point.getZ()))
     self.list4tree = self.track_tupel_list[:]
     return KDTree.construct_from_data(self.list4tree)
def knn():
	f = open('train.csv' , 'r')
	data = [] # all labeled data
	lookupTable = dict()
	num = 0
	for line in f:
		d = line.split(',')
		if d[0] == 'label':
		d = map(int , d)
		lookupTable[tuple(d[1:])] = d[0]
		if num > 40000:
		num += 1


	points1 = map(lambda x : tuple(x[1:]) , data)
	tree = KDTree.construct_from_data(points1)

	num = 0

	points = map(lambda x : x[1:] , data)

	f = open('train.csv' , 'r')
	for line in f:
		num += 1
		if num < 32000:
		if num > 32100:

		d = line.split(',')
		if d[0] == 'label':
		d = map(int , d)
		start = time.mktime(time.localtime())
		nn = tree.query(tuple(d[1:]) , 10)

		end = time.mktime(time.localtime())
		#print str(end - start) + ' secs to get distances'
		start = time.mktime(time.localtime())
		#nn = nearestNeighbours(points , d[1:] , 10)
		counts = defaultdict(int)
		for x in nn:
			counts[lookupTable[x]] += 1

		print str(d[0] == sorted(counts , key = lambda x : counts[x] , reverse = True)[0])

Esempio n. 6
def nearest_filtered(Primary_Technology, Role):
    #Separating out the indices
    #Opening the files

    with open("C:\DataMining\IndexPredictionsOutput.csv", 'rb') as f:
        reader = csv.reader(f)
        data = map(tuple, reader)

    # Filtering the list
    def f1(t):
        return t[2] == Primary_Technology  # Primary Skill is Informatica

    def f2(t):
        return t[11] == Role  # Role

    filters = [f1, f2]

    filtered_data = filter_lambda(filters, data)

    kd_filtered_data = map(
        operator.itemgetter(0, 57, 58, 59, 60, 61, 62, 63, 64, 65),

    # Creating the tree

    tree = KDTree.construct_from_data(kd_filtered_data)

    # Finding the nearest neighbours, t can be varied for the number of neighbours

    nearest = tree.query(query_point=(0, 10, 10, 10, 10, 10, 10, 10, 10, 10),

    # The serial number of the nearest neighbours
    nearest_index = [x[0] for x in nearest]

    # Using this to filter the original list
    kd_filtered_nearest = [
        tup for tup in filtered_data if tup[0] in nearest_index

    # Preparing the dataset to be printed

    kd_filtered_nearest_printed = map(operator.itemgetter(1, 2, 8, 9, 10),

    return kd_filtered_nearest_printed
Esempio n. 8
def proximity_filter(point, data, total):
    given a point, and a data set of points, we return a list of points, capped with a length of _total_, sorted in proximity.
    tree = KDTree.construct_from_data(data)
    return tree.query(query_point=point, t=total)
Esempio n. 9
# declare a 2D array for confusion matrix
confusionMatrix = [[0 for x in xrange(26)] for x in xrange(26)]

listOfPoints = []

with open('training_data.txt', 'r') as f:
    for line in f:
        if counter < 15000:
            counter += 1
        if isKDTreeConstructed == False:
            start = time.clock()
            kdTree = KDTree.construct_from_data(listOfPoints)
            elapsedForKDTreeConstruction = (time.clock() - start)
            isKDTreeConstructed = True
            print "KDTree constructed in %.2fs" % (elapsedForKDTreeConstruction)
            searchStartTime = time.clock()
            print "Evaluating input data..."
        currentLine = getDataElementFrom(line)
        nearest = kdTree.query(currentLine)
        confusionMatrix[ord(currentLine.lettr) - 65][ord(nearest[0].lettr) - 65] += 1
        if currentLine.lettr == nearest[0].lettr:
            properMatches += 1
        counter += 1
        if counter % 500 == 0:
Esempio n. 10
from kdtree import KDTree

data = [(1, 2, 3), (4, 0, 1), (5, 3, 1), (10, 5, 4), (9, 8, 9), (4, 2, 4)]

tree = KDTree.construct_from_data(data)
nearest = tree.query(query_point=(5, 4, 3), t=2)
print nearest
Esempio n. 11
def buildKDTree():
    #Create 2DTree with topics' coordinates
    data = topic_dict.keys()
    tree = KDTree.construct_from_data(data)
    return tree
Esempio n. 12
for city in files:

	points = []
	city_name = city.split('.')[0]

	with open('geopositions/%s.txt'%city_name, 'rb') as csvfile:
	    georeader = csv.reader(csvfile, delimiter=',', quotechar='|')
	    for row in georeader:
	        points.append((float(row[0]), float(row[1]), 1.0))

	block_size = 100
	start_time = time.time()
	while len(points) > 2000:
		print len(points)
		s_time = time.time()
		tree = KDTree.construct_from_data(points)
		# min_dist = 2000000000
		# f_point = None
		# s_point = None
		pairs = []
		for point in points:
			nearest = tree.query(query_point=point, t=2)
			found_point = nearest[1]
			dist = get_dist(found_point, point)
			pairs.append((point, found_point, dist))
			# if dist < min_dist:
			# 	f_point = found_point
			# 	s_point = point
			# 	min_dist = dist
		pairs.sort(key=lambda x: x[2])
		all_pairs = []
Esempio n. 13
                print "\tEvaluated %d rows for condensed training set in %.2fs" %(counter, time.clock() - start)
        elif isPrinted == False:
            elapsed = (time.clock() - start)
            print ("Condensed Training data mapped to feature space in %.4fmin." % (elapsed/60))
            print ("Boundary points evaluated: %d" % getTrainingData().__len__())
            classificationTimeStart = time.clock()
            isPrinted = True
            # Implement the search of the next 5000 elements using a KDTree
            searchStartTime = time.clock()
            #test: Trying with KDTree
            kdTree = KDTree.construct_from_data(getTrainingData())
            currentLine = getDataElementFrom(line)
#             nearest = evaluateLine(line, 1)
            nearest = kdTree.query(currentLine)
            confusionMatrix[ord(currentLine.lettr) - 65][ord(nearest[0].lettr) - 65] += 1
            if currentLine.lettr == nearest[0].lettr:
                properMatches += 1
#             confusionMatrix[ord(nearest[0]) - 65][ord(nearest[1]) - 65] += 1
#             if nearest[0] == nearest[1]:
#                 properMatches += 1
            counter += 1
Esempio n. 14
from kdtree import KDTree

data = [(1,2),(4,0),(8,3),(10,5),(9,8),(4,2)]

tree = KDTree.construct_from_data(data)
nearest = tree.query(query_point=(10,0), t=1)
Esempio n. 15
def fitting_obj_sample(param):
    computes residuals based on distance from ellipsoid
    can be used with different loss-functions on residual

    obj = 0

    # centers
    cx = param[0]
    cy = param[1]
    cz = param[2]

    rx = param[3]
    ry = param[4]
    rz = param[5]
    sx, sy, sz = ellipsoid(cx, cy, cz, rx, ry, rz, 20)
    num_samples = len(sx)

    #plot_point_cloud(sx, sy, sz)

    print "num_samples", num_samples

    #import pdb    

    #data = numpy.array(zip(sx, sy, sz)).T
    #tree = kdt.kdtree( data, leafsize=1000 )

    data = zip(sx, sy, sz)
    tree = KDTree.construct_from_data(data)

    num_queries = len(x)

    print "num_queries", num_queries

    global global_loss
    global_loss = numpy.zeros(num_queries)

    for idx in range(num_queries):

        Compute the unique root tbar of F(t) on (-e2*e2,+infinity);

        x0 = e0*e0*y0/(tbar + e0*e0);
        x1 = e1*e1*y1/(tbar + e1*e1);
        x2 = e2*e2*y2/(tbar + e2*e2);

        distance = sqrt((x0 - y0)*(x0 - y0) + (x1 - y1)*(x1 - y1) + (x2 - y2)*(x2 - y2))

        query = (x[idx], y[idx], z[idx])
        nearest, = tree.query(query_point=query, t=1)
        residual = dist.euclidean(query, nearest)

        #obj += loss_functions.squared_loss(residual)
        #obj += loss_functions.abs_loss(residual)
        #obj += loss_functions.eps_loss(residual, 2)
        #obj += loss_functions.eps_loss_bounded(residual, 2)
        loss_xt = loss_functions.eps_loss_asym(residual, 2, 1.0, 0.2)
        obj += loss_xt
        global_loss[idx] = num_queries

        #obj += eps_loss(residual, 2)*data_intensity[idx]

    # add regularizer to keep radii close
    reg = 10 * regularizer(param)

    print "loss", obj
    print "reg", reg

    obj += reg

    return obj
Esempio n. 16
    ]) + tuple([round(limitedListCA_ProtA[i][2], 3)]) + tuple([Va[i].T])
    #eigenvector Va.T is appended to each node

#listXb is the list of atoms in protein b
listXb = []
for i in range(len(limitedListCA_ProtB)):
    tupp = tuple([round(limitedListCA_ProtB[i][0], 3)]) + tuple([
        round(limitedListCA_ProtB[i][1], 3)
    ]) + tuple([round(limitedListCA_ProtB[i][2], 3)]) + tuple([Vb[i]])
    #eigenvector Vb is appended to each node

data1 = listXa
data2 = listXb
Tree1 = KDTree.construct_from_data(data1)
Tree2 = KDTree.construct_from_data(data2)
score = 0
#Times for KD Tree approach
startT = time.time()
for i in range(len(data1)):
    #finds the atoms within radius 30 of query pt
    score += Tree2.queryrange(query_point=data1[i], r=50)
solveTime = time.time() - startT

#Time for non-tree approach
#startT = time.time()
#total =0
#for i in range(len(data1)):
