Ejemplo n.º 1
0
 def test_distanc(self):
     result = distanc((1, 3), (1, 4))
     self.assertEqual(result, 1)
     result = distanc((20, -15), (20, -15))
     self.assertEqual(result, 0)
     self.assertRaises(Exception, distanc, (0, 0),
                       (1, 1, 1))  # vectors don't have same length
     self.assertRaises(Exception, distanc,
                       (1, 0))  # not enough input arguments
     self.assertRaises(Exception, distanc, (1, 0), (2, 5),
                       (-9, 18))  # too many input arguments
Ejemplo n.º 2
0
def distances(points):
    """
    Makes list of distances from point to point.

    :param list points: list of points to get distances from
    :return: list of distances from point to point
    """
    dist = [sqrt(distanc(points[i], points[i+1])) for i in range(len(points) - 1)]
    return dist
Ejemplo n.º 3
0
def minimal_distance(data, classes, space_size=(-20, 20), step=1):
    dist = kmeans(data, classes)
    trypoints = generate_points(space_size[0], space_size[1], step)
    for point in trypoints:
        distances = {key: distanc(point, key)
                     for key in dist.keys()
                     }  # dict for each point -> key: distance to him
        key_of_min = min(distances.keys(), key=(
            lambda key: distances[key]))  # select key with minimum distance
        dist[key_of_min].append(point)  # add point to this key
    return dist
Ejemplo n.º 4
0
def nearest_neighbour(data, classes, space_size=(-20, 20), step=1):
    k_means = kmeans(data, classes)
    trypoints = generate_points(space_size[0], space_size[1], step)
    points_in_kmeans = list(itertools.chain(*k_means.values()))
    kmeans_toplot = dict(k_means)
    for trypoint in trypoints:
        sorted_means = sorted(points_in_kmeans,
                              key=lambda p: distanc(trypoint, p))
        for key, value in k_means.items():
            for val in value:
                if val == sorted_means[0]:
                    kmeans_toplot[key].append(trypoint)
    return kmeans_toplot
Ejemplo n.º 5
0
def distances_to_centers(distances, data):
    """
    For each datum in 'data' gets eucledian distance from self to each key in 'distances' dict().
    This distance is stored in nested dict in format >> distances.keys(): {'all data points': 'dist from point to key'}

    :param dict distances: unspecified dictionary
    :param list data: list of tuples
    :return: nested dict in format >> distances.keys(): {'all data points': 'dist from point to key'}
    """
    centers = dict.fromkeys(distances)
    for center in centers.keys():
        centers[center] = {key: distanc(key, center) for key in data}
    return centers
Ejemplo n.º 6
0
def knearest_neighbour(data, classes, space_size=(-20, 20), step=1):
    k_means = kmeans(data, classes)
    trypoints = generate_points(space_size[0], space_size[1], step)
    means_toplot = dict(k_means)
    for trypoint in trypoints:
        for val in k_means.values():
            val.sort(key=lambda p: distanc(trypoint, p))
        newdict = {
            key: average_dist(trypoint, k_means[key])
            for key in k_means.keys()
        }
        keywithminvalue = min(newdict, key=newdict.get)
        means_toplot[keywithminvalue].append(trypoint)
    return means_toplot
Ejemplo n.º 7
0
def distance_sort(data, point):
    """
    Sorts points by distance in a way the algorithm needs it. This actually makes the chain map.

    :param list data: list of tuples where tuple is one point (x, y,...)
    :param tuple point: starting point
    :return: sorted list of points
    """
    sorted_by_distance = [point]  # ok, my point is the first in sorted list (distance is 0)
    points = data.copy()  # duplicate data and let's call them 'points'
    while len(points) > 1:  # while points has at least two elements inside, 'point' and one to compare to
        points.remove(point)  # remove sorted point from points
        # sort rest of the points by eucledian distance from the last point in sorted, i don't need sqrt here
        points.sort(key=lambda p: distanc(p, point))
        sorted_by_distance.append(points[0])  # get the closest one and append to sorted
        point = points[0]  # the new point to sort by eucledian dist from
    return sorted_by_distance  # sorted points
Ejemplo n.º 8
0
def maximin(data, q):
    data = data.copy(
    )  # copy data, just to be sure I will not screw something up
    mi1 = data.pop(0)  # get first point
    # mi2 is the furthest point from mi1
    mi2 = sorted(data, key=lambda p: distanc(p, mi1))[
        -1]  # sort data by distance from mi1 and get the last element
    data.remove(mi2)
    distances = {mi1: {}, mi2: {}}
    while True:
        distances = distances_to_centers(distances, data)
        maxvalue = get_maxmin(distances)
        avg = average_center_distance(q, distances)
        if maxvalue[1] > avg:
            distances[maxvalue[0]] = {}
            data.remove(maxvalue[0])
        else:
            break
    return len(distances.keys())  # return number of clusters
Ejemplo n.º 9
0
def average_dist(bod, points):
    distances = [distanc(point, bod) for point in points]
    return sum(distances) / len(points)
Ejemplo n.º 10
0
def average_center_distance(q, distances):
    distances = [
        distanc(c[0], c[1]) for c in combinations(distances.keys(), 2)
    ]
    return sum(distances) / len(distances) * q