Ejemplo n.º 1
0
def _proximity_filter(point, data, total):
    """
    Given a point, and a data list of coordinate tuples, we return an n number of coordinate tuples
    amounting to total
    """
    tree = KDTree.construct_from_data(data)
    return tree.query(query_point=point, t=total)
Ejemplo n.º 2
0
def autodiscover():
    global kd_tree
    
    from pixel.models import Pixel
    
    
    cc = list(Pixel.objects.all())
    kd_tree = KDTree.construct_from_data(cc)
Ejemplo n.º 3
0
 def __init__(self, eps, MinPts, pointlist):
     self.eps = eps
     self.MinPts = MinPts
     self.points = pointlist
     self.unvisited = [i for i in range(len(pointlist))]
     self.kdtree = KDTree.construct_from_data(self.formatpoints())
     self.pointidmap = {}
     for point in pointlist:
         self.pointidmap[tuple(point.coordinates)] = point.id
Ejemplo n.º 4
0
 def createTrackpointTree(self, trackpoints):
     '''
     Create a tree out of the trackpoints
     '''
     self.track_tupel_list = []
     #Change from Vec3 to tupel
     for point in self.trackpoints:
         self.track_tupel_list.append(
             (point.getX(), point.getY(), point.getZ()))
     self.list4tree = self.track_tupel_list[:]
     return KDTree.construct_from_data(self.list4tree)
Ejemplo n.º 5
0
def knn():
	f = open('train.csv' , 'r')
	data = [] # all labeled data
	lookupTable = dict()
	num = 0
	for line in f:
		d = line.split(',')
		if d[0] == 'label':
			continue
		d = map(int , d)
		data.append(d)
		lookupTable[tuple(d[1:])] = d[0]
		if num > 40000:
			break
		num += 1

	f.close()

	points1 = map(lambda x : tuple(x[1:]) , data)
	tree = KDTree.construct_from_data(points1)

	num = 0

	points = map(lambda x : x[1:] , data)

	f = open('train.csv' , 'r')
	for line in f:
		num += 1
		if num < 32000:
			continue
		if num > 32100:
			break

		d = line.split(',')
		if d[0] == 'label':
			continue
		d = map(int , d)
		start = time.mktime(time.localtime())
		nn = tree.query(tuple(d[1:]) , 10)

		end = time.mktime(time.localtime())
		#print str(end - start) + ' secs to get distances'
		start = time.mktime(time.localtime())
		#nn = nearestNeighbours(points , d[1:] , 10)
		counts = defaultdict(int)
		for x in nn:
			counts[lookupTable[x]] += 1

		print str(d[0] == sorted(counts , key = lambda x : counts[x] , reverse = True)[0])


	f.close()
Ejemplo n.º 6
0
def nearest_filtered(Primary_Technology, Role):
    #Separating out the indices
    #Opening the files

    with open("C:\DataMining\IndexPredictionsOutput.csv", 'rb') as f:
        reader = csv.reader(f)
        data = map(tuple, reader)

    # Filtering the list
    def f1(t):
        return t[2] == Primary_Technology  # Primary Skill is Informatica

    def f2(t):
        return t[11] == Role  # Role

    filters = [f1, f2]

    filtered_data = filter_lambda(filters, data)

    kd_filtered_data = map(
        operator.itemgetter(0, 57, 58, 59, 60, 61, 62, 63, 64, 65),
        filtered_data)

    # Creating the tree

    tree = KDTree.construct_from_data(kd_filtered_data)

    # Finding the nearest neighbours, t can be varied for the number of neighbours

    nearest = tree.query(query_point=(0, 10, 10, 10, 10, 10, 10, 10, 10, 10),
                         t=5)

    # The serial number of the nearest neighbours
    nearest_index = [x[0] for x in nearest]

    # Using this to filter the original list
    kd_filtered_nearest = [
        tup for tup in filtered_data if tup[0] in nearest_index
    ]

    # Preparing the dataset to be printed

    kd_filtered_nearest_printed = map(operator.itemgetter(1, 2, 8, 9, 10),
                                      kd_filtered_nearest)

    return kd_filtered_nearest_printed
Ejemplo n.º 7
0
def nearest_filtered(Primary_Technology, Role):
    #Separating out the indices
    #Opening the files
    
    with open("C:\DataMining\IndexPredictionsOutput.csv", 'rb') as f:
        reader = csv.reader(f)
        data = map(tuple, reader)


    # Filtering the list
    def f1(t): return t[2].strip()==Primary_Technology # Primary Skill is Informatica
    def f2(t): return t[11].strip()==Role # Role

    filters = [f1,f2]    
    
    filtered_data = filter_lambda(filters, data)
    
    
    kd_filtered_data = map(operator.itemgetter(0,58,59,60,61,62,63,64,65,66), filtered_data)


    # Creating the tree

    tree = KDTree.construct_from_data(kd_filtered_data)
 
    # Finding the nearest neighbours, t can be varied for the number of neighbours

    nearest = tree.query(query_point=(85,10,10,10,10,10,10,10,10,10), t=10)

    # The serial number of the nearest neighbours
    nearest_index = [x[0] for x in nearest]

    # Using this to filter the original list
    kd_filtered_nearest = [tup for tup in filtered_data if tup[0] in nearest_index]

    # Preparing the dataset to be printed

    kd_filtered_nearest_printed = map(operator.itemgetter(0,1,2,26,8,23,10), kd_filtered_nearest)

    return kd_filtered_nearest_printed
Ejemplo n.º 8
0
def proximity_filter(point, data, total):
    """
    given a point, and a data set of points, we return a list of points, capped with a length of _total_, sorted in proximity.
    """
    tree = KDTree.construct_from_data(data)
    return tree.query(query_point=point, t=total)
Ejemplo n.º 9
0
# declare a 2D array for confusion matrix
confusionMatrix = [[0 for x in xrange(26)] for x in xrange(26)]

listOfPoints = []

with open('training_data.txt', 'r') as f:
    for line in f:
        if counter < 15000:
            listOfPoints.append(getDataElementFrom(line))
            counter += 1
            continue
        
        if isKDTreeConstructed == False:
            start = time.clock()
            kdTree = KDTree.construct_from_data(listOfPoints)
            elapsedForKDTreeConstruction = (time.clock() - start)
            isKDTreeConstructed = True
            print "KDTree constructed in %.2fs" % (elapsedForKDTreeConstruction)
            searchStartTime = time.clock()
            print "Evaluating input data..."
            
        currentLine = getDataElementFrom(line)
        nearest = kdTree.query(currentLine)
        confusionMatrix[ord(currentLine.lettr) - 65][ord(nearest[0].lettr) - 65] += 1
            
        if currentLine.lettr == nearest[0].lettr:
            properMatches += 1
            
        counter += 1
        if counter % 500 == 0:
Ejemplo n.º 10
0
from kdtree import KDTree

data = [(1, 2, 3), (4, 0, 1), (5, 3, 1), (10, 5, 4), (9, 8, 9), (4, 2, 4)]

tree = KDTree.construct_from_data(data)
nearest = tree.query(query_point=(5, 4, 3), t=2)
print nearest
Ejemplo n.º 11
0
def buildKDTree():
    #Create 2DTree with topics' coordinates
    data = topic_dict.keys()
    tree = KDTree.construct_from_data(data)
    return tree
Ejemplo n.º 12
0
for city in files:

	points = []
	city_name = city.split('.')[0]

	with open('geopositions/%s.txt'%city_name, 'rb') as csvfile:
	    georeader = csv.reader(csvfile, delimiter=',', quotechar='|')
	    for row in georeader:
	        points.append((float(row[0]), float(row[1]), 1.0))

	block_size = 100
	start_time = time.time()
	while len(points) > 2000:
		print len(points)
		s_time = time.time()
		tree = KDTree.construct_from_data(points)
		# min_dist = 2000000000
		# f_point = None
		# s_point = None
		pairs = []
		for point in points:
			nearest = tree.query(query_point=point, t=2)
			found_point = nearest[1]
			dist = get_dist(found_point, point)
			pairs.append((point, found_point, dist))
			# if dist < min_dist:
			# 	f_point = found_point
			# 	s_point = point
			# 	min_dist = dist
		pairs.sort(key=lambda x: x[2])
		all_pairs = []
Ejemplo n.º 13
0
                print "\tEvaluated %d rows for condensed training set in %.2fs" %(counter, time.clock() - start)
                
            continue
        
        elif isPrinted == False:
            elapsed = (time.clock() - start)
            print ("Condensed Training data mapped to feature space in %.4fmin." % (elapsed/60))
            print ("Boundary points evaluated: %d" % getTrainingData().__len__())
            classificationTimeStart = time.clock()
            isPrinted = True
            
            # Implement the search of the next 5000 elements using a KDTree
            searchStartTime = time.clock()
            
            #test: Trying with KDTree
            kdTree = KDTree.construct_from_data(getTrainingData())
            
        else:
            currentLine = getDataElementFrom(line)
#             nearest = evaluateLine(line, 1)
            nearest = kdTree.query(currentLine)
            confusionMatrix[ord(currentLine.lettr) - 65][ord(nearest[0].lettr) - 65] += 1
            
            if currentLine.lettr == nearest[0].lettr:
                properMatches += 1
#             confusionMatrix[ord(nearest[0]) - 65][ord(nearest[1]) - 65] += 1
#             
#             if nearest[0] == nearest[1]:
#                 properMatches += 1
            
            counter += 1
Ejemplo n.º 14
0
from kdtree import KDTree

data = [(1,2),(4,0),(8,3),(10,5),(9,8),(4,2)]

tree = KDTree.construct_from_data(data)
nearest = tree.query(query_point=(10,0), t=1)
print(nearest)
Ejemplo n.º 15
0
def fitting_obj_sample(param):
    """
    computes residuals based on distance from ellipsoid
    
    can be used with different loss-functions on residual
    """


    obj = 0

    # centers
    cx = param[0]
    cy = param[1]
    cz = param[2]

    rx = param[3]
    ry = param[4]
    rz = param[5]
    
    sx, sy, sz = ellipsoid(cx, cy, cz, rx, ry, rz, 20)
    num_samples = len(sx)

    #plot_point_cloud(sx, sy, sz)

    print "num_samples", num_samples

    #import pdb    
    #pdb.set_trace()

    #data = numpy.array(zip(sx, sy, sz)).T
    #tree = kdt.kdtree( data, leafsize=1000 )

    data = zip(sx, sy, sz)
    tree = KDTree.construct_from_data(data)

    num_queries = len(x)

    print "num_queries", num_queries

    global global_loss
    global_loss = numpy.zeros(num_queries)

    for idx in range(num_queries):

        """
        Compute the unique root tbar of F(t) on (-e2*e2,+infinity);

        x0 = e0*e0*y0/(tbar + e0*e0);
        x1 = e1*e1*y1/(tbar + e1*e1);
        x2 = e2*e2*y2/(tbar + e2*e2);

        distance = sqrt((x0 - y0)*(x0 - y0) + (x1 - y1)*(x1 - y1) + (x2 - y2)*(x2 - y2))
        """

        query = (x[idx], y[idx], z[idx])
        nearest, = tree.query(query_point=query, t=1)
        residual = dist.euclidean(query, nearest)

        #obj += loss_functions.squared_loss(residual)
        #obj += loss_functions.abs_loss(residual)
        #obj += loss_functions.eps_loss(residual, 2)
        #obj += loss_functions.eps_loss_bounded(residual, 2)
        loss_xt = loss_functions.eps_loss_asym(residual, 2, 1.0, 0.2)
        obj += loss_xt
        global_loss[idx] = num_queries

        #obj += eps_loss(residual, 2)*data_intensity[idx]

    # add regularizer to keep radii close
    reg = 10 * regularizer(param)

    print "loss", obj
    print "reg", reg

    obj += reg

    return obj
Ejemplo n.º 16
0
    ]) + tuple([round(limitedListCA_ProtA[i][2], 3)]) + tuple([Va[i].T])
    #eigenvector Va.T is appended to each node
    listXa.append(tupp)

#listXb is the list of atoms in protein b
listXb = []
for i in range(len(limitedListCA_ProtB)):
    tupp = tuple([round(limitedListCA_ProtB[i][0], 3)]) + tuple([
        round(limitedListCA_ProtB[i][1], 3)
    ]) + tuple([round(limitedListCA_ProtB[i][2], 3)]) + tuple([Vb[i]])
    #eigenvector Vb is appended to each node
    listXb.append(tupp)

data1 = listXa
data2 = listXb
Tree1 = KDTree.construct_from_data(data1)
Tree2 = KDTree.construct_from_data(data2)
score = 0
#print("####################################")
#Times for KD Tree approach
startT = time.time()
for i in range(len(data1)):
    #finds the atoms within radius 30 of query pt
    score += Tree2.queryrange(query_point=data1[i], r=50)
solveTime = time.time() - startT
print(solveTime)

#Time for non-tree approach
#startT = time.time()
#total =0
#for i in range(len(data1)):
Ejemplo n.º 17
0
	def __init__(self, data):
		self.data = data
		print("Kd-tree will be constructed...")
		self.tree = KDTree.construct_from_data(data)
		print("Kd-tree construction done!")
		self.rel_levels = set([vector.rel for vector in data])
Ejemplo n.º 18
0
def query_nearest(METAR_data, ref_point):
    tree = KDTree.construct_from_data(METAR_data)

    nearest = tree.query(query_point=ref_point)

    return nearest
Ejemplo n.º 19
0
def top_down(grid, output, tile_size):
    '''
    Starts matching from top-left, going to bottom-right
    '''
    
    #user_image = UserImage()
    
    #cursor = Pixel.objects.all()
    
#    size = 500
#    mm = Pixel.objects.count()-size-1
#    if mm > 10:
#        index = random.randint(0,mm)
#    else:
#        index = 0
#    
#    cursor = Pixel.objects.all()[index:index+size]
    
    cursor = Pixel.objects.all()
    
    #image_list = ImageList(gen(cursor))
    
    image_list = KDTree.construct_from_data(list(cursor))

    #nearest = tree.query(query_point=(5,4,3), t=3)
    
    
    _tile_list = dict()
    
    counter = 0
    for yPos, y in enumerate(grid):
        for xPos, x in enumerate(grid[yPos]):
            #print counter, 
            rgb = grid[yPos][xPos].color
            
            qrgb = quantize_color(rgb)
            
            #tile = image_list.search(rgb).image.blob
            #tile_wrapper = image_list.search(rgb)
            
            w = image_list.query(query_point=qrgb, t=1)
            
            #i = random.randint(0,len(w)-1)
            
            tile_pixel = w[0]
            
            #tile_pixel = tile_wrapper.pixel
            #print tile_pixel.id
            #tile = tile_wrapper.image
            #tile = Image.open(StringIO(tile_pixel.image1.file.read())) 
            tile = tile_pixel.image
            
            
            tile.thumbnail(tile_size) 
            xy = (xPos * tile_size[0], yPos * tile_size[1])
            #print tile
            output.paste(tile, xy)
            counter += 1
            
            _tile_list.setdefault((tile_pixel.id), list()).append((xy[0],xy[1]))
            
            #print counter
    return _tile_list;
Ejemplo n.º 20
0
listXa=[]
for i in range(len(limitedListCA_ProtA)):
	tupp = tuple([round(limitedListCA_ProtA[i][0], 3)])+tuple([round(limitedListCA_ProtA[i][1],3)])+tuple([round(limitedListCA_ProtA[i][2], 3)])+tuple([Va[i].T])
	#eigenvector Va.T is appended to each node
	listXa.append(tupp)

#listXb is the list of atoms in protein b
listXb=[]
for i in range(len(limitedListCA_ProtB)):
	tupp = tuple([round(limitedListCA_ProtB[i][0], 3)])+tuple([round(limitedListCA_ProtB[i][1],3)])+tuple([round(limitedListCA_ProtB[i][2], 3)])+tuple([Vb[i]])
	#eigenvector Vb is appended to each node
	listXb.append(tupp)

data1 = listXa
data2 = listXb
Tree1 = KDTree.construct_from_data(data1)
Tree2 = KDTree.construct_from_data(data2)
score = 0
#print("####################################")
#Times for KD Tree approach
startT = time.time()
for i in range(len(data1)):
	#finds the atoms within radius 30 of query pt
	score += Tree2.queryrange(query_point=data1[i], r = 50)
solveTime = time.time() - startT
print(solveTime)



#Time for non-tree approach
#startT = time.time()
Ejemplo n.º 21
0
def buildKDTree():
	#Create 2DTree with topics' coordinates
	data = topic_dict.keys() 
	tree = KDTree.construct_from_data(data) 
	return tree