def test_distanc(self): result = distanc((1, 3), (1, 4)) self.assertEqual(result, 1) result = distanc((20, -15), (20, -15)) self.assertEqual(result, 0) self.assertRaises(Exception, distanc, (0, 0), (1, 1, 1)) # vectors don't have same length self.assertRaises(Exception, distanc, (1, 0)) # not enough input arguments self.assertRaises(Exception, distanc, (1, 0), (2, 5), (-9, 18)) # too many input arguments
def distances(points): """ Makes list of distances from point to point. :param list points: list of points to get distances from :return: list of distances from point to point """ dist = [sqrt(distanc(points[i], points[i+1])) for i in range(len(points) - 1)] return dist
def minimal_distance(data, classes, space_size=(-20, 20), step=1): dist = kmeans(data, classes) trypoints = generate_points(space_size[0], space_size[1], step) for point in trypoints: distances = {key: distanc(point, key) for key in dist.keys() } # dict for each point -> key: distance to him key_of_min = min(distances.keys(), key=( lambda key: distances[key])) # select key with minimum distance dist[key_of_min].append(point) # add point to this key return dist
def nearest_neighbour(data, classes, space_size=(-20, 20), step=1): k_means = kmeans(data, classes) trypoints = generate_points(space_size[0], space_size[1], step) points_in_kmeans = list(itertools.chain(*k_means.values())) kmeans_toplot = dict(k_means) for trypoint in trypoints: sorted_means = sorted(points_in_kmeans, key=lambda p: distanc(trypoint, p)) for key, value in k_means.items(): for val in value: if val == sorted_means[0]: kmeans_toplot[key].append(trypoint) return kmeans_toplot
def distances_to_centers(distances, data): """ For each datum in 'data' gets eucledian distance from self to each key in 'distances' dict(). This distance is stored in nested dict in format >> distances.keys(): {'all data points': 'dist from point to key'} :param dict distances: unspecified dictionary :param list data: list of tuples :return: nested dict in format >> distances.keys(): {'all data points': 'dist from point to key'} """ centers = dict.fromkeys(distances) for center in centers.keys(): centers[center] = {key: distanc(key, center) for key in data} return centers
def knearest_neighbour(data, classes, space_size=(-20, 20), step=1): k_means = kmeans(data, classes) trypoints = generate_points(space_size[0], space_size[1], step) means_toplot = dict(k_means) for trypoint in trypoints: for val in k_means.values(): val.sort(key=lambda p: distanc(trypoint, p)) newdict = { key: average_dist(trypoint, k_means[key]) for key in k_means.keys() } keywithminvalue = min(newdict, key=newdict.get) means_toplot[keywithminvalue].append(trypoint) return means_toplot
def distance_sort(data, point): """ Sorts points by distance in a way the algorithm needs it. This actually makes the chain map. :param list data: list of tuples where tuple is one point (x, y,...) :param tuple point: starting point :return: sorted list of points """ sorted_by_distance = [point] # ok, my point is the first in sorted list (distance is 0) points = data.copy() # duplicate data and let's call them 'points' while len(points) > 1: # while points has at least two elements inside, 'point' and one to compare to points.remove(point) # remove sorted point from points # sort rest of the points by eucledian distance from the last point in sorted, i don't need sqrt here points.sort(key=lambda p: distanc(p, point)) sorted_by_distance.append(points[0]) # get the closest one and append to sorted point = points[0] # the new point to sort by eucledian dist from return sorted_by_distance # sorted points
def maximin(data, q): data = data.copy( ) # copy data, just to be sure I will not screw something up mi1 = data.pop(0) # get first point # mi2 is the furthest point from mi1 mi2 = sorted(data, key=lambda p: distanc(p, mi1))[ -1] # sort data by distance from mi1 and get the last element data.remove(mi2) distances = {mi1: {}, mi2: {}} while True: distances = distances_to_centers(distances, data) maxvalue = get_maxmin(distances) avg = average_center_distance(q, distances) if maxvalue[1] > avg: distances[maxvalue[0]] = {} data.remove(maxvalue[0]) else: break return len(distances.keys()) # return number of clusters
def average_dist(bod, points): distances = [distanc(point, bod) for point in points] return sum(distances) / len(points)
def average_center_distance(q, distances): distances = [ distanc(c[0], c[1]) for c in combinations(distances.keys(), 2) ] return sum(distances) / len(distances) * q