Example #1
0
 def test_hilbert_index(self):
     # From the example on p 18 of Hamilton
     assert hilbert.hilbert_index(2, 3, [5, 6]) == 45
Example #2
0
 def test_hilbert_point(self):
     for n in [2, 3, 4]:
         m = 3
         for i in range(2**(n*m)):
             v = hilbert.hilbert_point(n, m, i)
             assert i == hilbert.hilbert_index(n, m, v)
Example #3
0
 def test_hilbert_point(self):
     n, m = 2, 3
     for i in range(2**(n*m)):
         v = hilbert.hilbert_point(n, m, i)
         assert i == hilbert.hilbert_index(n, m, v)
Example #4
0
def main():
	print_parameters()

	# sort point counts.
	point_counts.sort()
	# sort nearness ranges and knn counts
	k_values.sort()
	nearness_percentages.sort()
	nearness_radii = [x*hypercube_edge_len for x in nearness_percentages]
	max_pc = point_counts[-1]
	max_knnc = k_values[-1]

	# OUTER LOOP FOR MULTIPLE RUNS
	for run in range(runs):
		print "Run {0}".format(run+1)

		# RUN-SPECIFIC DATA STRUCTURES
		# Coordinates for each point. Points are indices (implicitly).
		coordinates = []
		# Dynamically drawn grid squares containin 
		discretized_coords = [] 
		k_nearest_neighbors = {} #(pc, point, k) -> [] of neighbors
		near_neighbors = {} #(pc, point, radius) -> [] of neighbors
		knn_conservation = {} #
		knn_conservation_stats = {} #(point count, mapping, k value)
		nn_conservation = {} #
		nn_conservation_stats = {} #(point_count, mapping, radius) -> (avg, var)
		knn_core_conservation = {} #
		knn_core_conservation_stats = {} #
		nn_core_conservation = {} #
		nn_core_conservation_stats = {} #
		# initialize distance matrix
		distance_matrix = [[0 for j in xrange(max_pc)] for i in xrange(max_pc)]
		prev_point_count = 0
	
		# BEGIN RUN
		for point_count in point_counts:
		# generate point_count points (or add to existing), and fill in distance matrix as you go along.
			for i in range(point_count-prev_point_count):
				new_point = []
				d_new_point = []
				for j in range(dimension):
					if distribution == 'uniform':
						coord = random.uniform(0, hypercube_edge_len)
					elif distribution == 'gauss':
						coord = random.gauss(gauss_mu, gauss_sigma) % hypercube_edge_len
					else:
						print 'Distribution {0} unsupported. Using uniform.\n'.format(distribution)
						coord = random.uniform(0, hypercube_edge_len)
					new_point.append(coord)
					d_new_point.append(int(round(coord*100)))
				coordinates.append(tuple(new_point))
				# get the discretized coordinate.
				discretized_coords.append(tuple(d_new_point))
				# fill out distance matrix as you go
				i_adj = i + prev_point_count
				for j in range(i_adj):
					dist = euclidean_dist(coordinates[i_adj], coordinates[j])
					# take advantage of the symmetry
					distance_matrix[i_adj][j] = dist
					distance_matrix[j][i_adj] = dist
				distance_matrix[i_adj][i_adj] = 0
			if verbosity >= 2 and point_count <= MAX_DISPLAY_C:
				print "Coordinates:"
				for c in enumerate(coordinates):
					print c
				print
			if verbosity >= 4 and point_count <= MAX_DISPLAY:
				print 'Distance matrix ({0}x{0}):'.format(point_count)
				printf_array(2, [""], distance_matrix)
				print

			for point in range(point_count):
				# Select a point's row in matrix
				dists_from = distance_matrix[point]
				# Get lists of near neighbors
				for radius in nearness_radii:
					percent_radius = radius/hypercube_edge_len
					near_neighbors[(point_count, point, percent_radius)] = [p for p in range(point_count) if dists_from[p] <= radius and p != point]
				# Get lists of k-nearest neighbors
				dist_point_pairs = [(dists_from[i], i) for i in xrange(len(dists_from))]
				dist_point_pairs.sort()
				for k in k_values:
					k_nearest_neighbors[(point_count, point, k)] = [dist_point_pairs[x+1][1] for x in xrange(k)]
			if measure_nn and verbosity >= 3 and point_count <= MAX_DISPLAY_C:
				print "Near neighbors (point count, point, radius):\n",
				printf_dict(near_neighbors)
			if measure_knn and verbosity >= 3 and point_count <= MAX_DISPLAY_C:
				print "K nearest neighbors (point count, point, k):\n",
				printf_dict(k_nearest_neighbors)
				
			# Now for each curve, map points to one dimension by ordering them by curve index.
			for map in mappings:
				try:
					if map == 'hilbert':
						# For each point, get 1D image
						images_1D = [(hilbert.hilbert_index(dimension, order, c[1]), c[0]) for c in enumerate(discretized_coords)]
						# Sort into 1D order by hilbert distance (called hilbert_index above). 
						# Each element is of form (hilbert_distance, coordinate_index) 
						images_1D.sort()
						if verbosity >= 3 and point_count <= MAX_DISPLAY_C:
							print "Hilbert images (Hilbert distance, particle #):\n", images_1D, "\n"
						ordering_1D = [images_1D[i][1] for i in range(len(images_1D))]
						if verbosity >= 2 and point_count < 20:
							print "Hilbert ordering (by particle #):\n", ordering_1D, "\n"
					elif map == 'zorder':
						zmap = zorder.ZOrder(dimension, order)
						images_1D = [(zmap.index(list(c[1])), c[0]) for c in enumerate(discretized_coords)]
						images_1D.sort()
						if verbosity >= 3 and point_count <= MAX_DISPLAY_C:
							print "Z-curve images (Z-curve distance, particle #):\n", images_1D, "\n"
						ordering_1D = [images_1D[i][1] for i in range(len(images_1D))]
						if verbosity >= 2 and point_count < 20:
							print "Z-ordering (by particle #):\n", ordering_1D, "\n"
					else:
						raise Exception("Map '{0}' not supported.\n".format(map))

					# Metric: % of near neighbors (nns) within <# nns> steps of given point in 1D ordering
					if measure_nn:
						for radius in nearness_radii:
							p_sum = 0.0
							n_sum = 0.0
							points_with_neighbors = 0.0
							percent_radius = radius/hypercube_edge_len
							for point in range(point_count):
								index_p = ordering_1D.index(point)
								#get near neighbors
								nns = near_neighbors[(point_count, point, percent_radius)]
								nn_count = len(nns)
								if nn_count == 0:
									nn_conservation[(point_count, point, map, percent_radius)] = (0, (None))
									nn_conservation_stats[(point_count, map, percent_radius)] = (0, (None))
								else:
									#see how many near neighbors are within nns_count distance in 1D
									nn_count_1D = 0.0
									for neighbor in nns:
										index_n = ordering_1D.index(neighbor)
										dist = math.fabs(index_p - index_n)
										#is within len(nns) to point?
										if dist <= nn_count:
											nn_count_1D += 1
									percentage = nn_count_1D/nn_count
									nn_conservation[(point_count, point, map, percent_radius)] = (nn_count, (percentage))
									p_sum += percentage
									n_sum += nn_count
									points_with_neighbors += 1
							# Average and variance of above across all points
							if points_with_neighbors != 0:
								p_average = p_sum/points_with_neighbors
								n_average = n_sum/points_with_neighbors
								variance_numerator = 0.0
								for point in range(point_count):
									(nn_count, (percentage)) = nn_conservation[(point_count, point, map, percent_radius)]				
									if percentage != None:
										difference = p_average - percentage
										variance_numerator += (difference*difference)
								variance = variance_numerator/points_with_neighbors
								nn_conservation_stats[(point_count, map, percent_radius)] = (n_average, (p_average, variance))
							else:
								nn_conservation_stats[(point_count, map, percent_radius)] = (0, (None, None))
					
					# Metric: Average num. of cores per near neighbor neighborhood
					if measure_nn_core:
						for core_count in core_counts:
							# Divide ordering into core_count chunks
							chunk_size = int(math.ceil(point_count/float(core_count)))
							pivots = get_pivots(chunk_size, point_count)
							# Find out how many cores a neighborhood gets scattered across
							for percent_radius in nearness_percentages:
								scattered_core_counts = []
								scc_denom = 0
								for p in range(point_count):
									used_cores = [0 for i in xrange(core_count)]
									nns = near_neighbors[(point_count, p, percent_radius)]
									if len(nns) != 0:
										for neighbor in nns:
											index = ordering_1D.index(neighbor)
											assigned_core = index/chunk_size
											if assigned_core < 0 or assigned_core >= core_count: #TEST
												print index, assigned_core, core_count #TEST
											used_cores[assigned_core] = used_cores[assigned_core] or 1
										scattered_core_counts.append(sum(used_cores))
										scc_denom += 1
								if scc_denom != 0:
									scc_average = sum(scattered_core_counts)/float(scc_denom)
								else:
									scc_average = None
								nn_core_conservation_stats[(point_count, map, percent_radius, core_count)] = scc_average

					
					# Metric: % of k nearest neighbors (knns) of a given point that are also knns in 1D ordering
					if measure_knn:
						for k in k_values:
							window_size = k+1
							total_1D_sum = 0.0
							for point in range(point_count):
								# Get list of knns in 1D ordering
								p_index = ordering_1D.index(point)
								if p_index < window_size/2: #k
									lower = 0
									upper = window_size
								elif p_index > (point_count - window_size/2): #pc - 2k
									lower = point_count - window_size
									upper = point_count
								else:
									lower = p_index-(window_size/2)
									upper = p_index+(window_size/2) + 1
								knns_1D = ordering_1D[lower:upper]
								# Is each n-space nearest neighbor a 1D nearest neighbor?
								knns = k_nearest_neighbors[(point_count, point, k)]
								knn_count_1D = 0
								for neighbor in knns:
									if neighbor in knns_1D:
										knn_count_1D += 1
								knn_conservation[(point_count, point, map, k)] = knn_count_1D/float(k)
								total_1D_sum += knn_count_1D 
							# Average and variance of above across all points
							average = total_1D_sum/(k*point_count)
							variance_numerator = 0.0
							for point in range(point_count):
								percentage = knn_conservation[(point_count, point, map, k)]				
								difference = average - percentage
								variance_numerator += (difference*difference)
							variance = variance_numerator/point_count
							knn_conservation_stats[(point_count, map, k)] = (average, variance)
							
				except Exception, e:
					import traceback
					print traceback.format_exc()
			
			#Update point count so previously generated points are preserved within run
			prev_point_count = point_count
		
		#Update stats across runs
		final_knn_averages.append(knn_conservation_stats)
		final_knn_core_averages.append(knn_core_conservation_stats)
		final_nn_averages.append(nn_conservation_stats)
		final_nn_core_averages.append(nn_core_conservation_stats)
		
		#Reporting
		if measure_nn and verbosity >= 3 and point_count <= MAX_DISPLAY_C:			
			print "1D conservation of near neighbors:\n(point count, point, map, radius) : (# of neighbors, percentage by point)\n",
			printf_dict(nn_conservation)
		if measure_nn and verbosity >= 1:
			print "Stats for 1D conservation of near neighbors:\n(point count, map, radius) : (Average # of neighbors, (average, variance))\n",
			printf_dict(nn_conservation_stats)
		if measure_nn_core and verbosity >= 1:
			print "Stats for cores/neighborhood (near):\n(point count, map, radius, core count) : average\n",
			printf_dict(nn_core_conservation_stats)
		if measure_knn and verbosity >= 3 and point_count <= MAX_DISPLAY_C:
			print "1D conservation of k nearest neighbors:\n(point count, point, map, k) : percentage by point\n",
			printf_dict(knn_conservation)
		if measure_knn and verbosity >= 1:
			print "Stats for 1D conservation of k nearest neighbors:\n(point count, map, k) : (average, variance)\n",
			printf_dict(knn_conservation_stats)
		if measure_knn_core and verbosity >= 1:
			print "Stats for cores/neighborhood (k nearest):\n(point count, map, k, core count) : average\n",
			printf_dict(knn_core_conservation_stats)
Example #5
0
 def test_hilbert_index(self):
     # From the example on p 18 of Hamilton
     assert hilbert.hilbert_index(2, 3, [5, 6]) == 45
Example #6
0
 def test_hilbert_point(self):
     for n in [2, 3, 4]:
         m = 3
         for i in range(2**(n * m)):
             v = hilbert.hilbert_point(n, m, i)
             assert i == hilbert.hilbert_index(n, m, v)