def cluster_distance(cluster1, cluster2, distance_agg=min):
    """finds the aggregate distance between elements of cluster1
    and elements of cluster2"""
    return distance_agg([
        distance(input1, input2) for input1 in get_values(cluster1)
        for input2 in get_values(cluster2)
    ])
def print_distances(data_matrix):
    num_rows, num_cols = shape(data_matrix)
    print("Distances:")
    for i in range(num_rows):
        for i_next in range(num_rows):
            if i_next > i:
                d = distance(data_matrix[i], data_matrix[i_next])
                print(i, "to", i_next, d)
def knn_classify2(k, cities, new_point):
    by_distance = sorted(cities,
                         key=lambda city:
                         distance((city.longitude, city.latitude), new_point))

    k_nearest_labels = [label for _, _, label in by_distance[:k]]

    return majority_vote(k_nearest_labels)
Exemple #4
0
def knn_classify(k, labeled_points, new_point):
    """each labeled point should be a pair (point, label)"""

    by_distance = sorted(
        labeled_points,
        key=lambda point_label: distance(point_label[0], new_point))
    k_nearest_labels = [label for _, label in by_distance[:k]]

    return majority_vote(k_nearest_labels)
Exemple #5
0
def knn_classify(k, labeled_points, new_point):
    """each labeled point should be a pair (point, label)"""
    # order the labeled points from nearest to farthest
    by_distance = sorted(
        labeled_points,
        key=lambda point_label: distance(point_label[0], new_point))
    # find the labels for the k closest
    k_nearest_labels = [label for _, label in by_distance[:k]]
    # and let them vote
    return majority_vote(k_nearest_labels)
def knn_classify(k: int, lableled_ponts: List[LabeledPoint],
                 new_point: Vector) -> str:
    # Order the labeled points from nearest to farthest
    by_distance = sorted(lableled_ponts,
                         key=lambda lp: distance(lp.point, new_point))

    # Find the labels for the k closest
    k_nearest_labels = [lp.label for lp in by_distance[:k]]

    return marjority_vote(k_nearest_labels)
def run_experiment():
    v = [random.randint(-10, 10) for i in range(3)]
    tolerance = 0.0000001

    while True:
        gradient = sum_of_squares_gradient(v)
        next_v = step(v, gradient, -0.01)
        if distance(next_v, v) < tolerance:
            break
        v = next_v
    print(v)
def knn_classify(k, labeled_points, new_point):
    """each labeled point should be a pair (point, label)"""
    
    # order the labeled points from nearest to farthest
    by_distance = sorted(labeled_points,
                         key=lambda (point, _): distance(point, new_point))

    # find the labels for the k closest
    k_nearest_labels = [label for _, label in by_distance[:k]]

    # and let them vote
    return majority_vote(k_nearest_labels)
def find_eigenvector(A, tolerance=0.00001):
    guess = [1 for __ in A]

    while True:
        result = matrix_operate(A, guess)
        length = magnitude(result)
        next_guess = scalar_multiply(1/length, result)
        
        if distance(guess, next_guess) < tolerance:
            return next_guess, length # eigenvector, eigenvalue
        
        guess = next_guess
def find_eigenvector(A, tolerance=0.00001):
    guess = [1 for __ in A]

    while True:
        result = matrix_operate(A, guess)
        length = magnitude(result)
        next_guess = scalar_multiply(1 / length, result)

        if distance(guess, next_guess) < tolerance:
            return next_guess, length  # eigenvector, eigenvalue

        guess = next_guess
Exemple #11
0
def knn_classify(k, labeled_points, new_point):
    #ラベル付きデータポイントは、(point, label)のペアとなっている

    #ラベル付きデータポイントを近いものから順に並べる
    by_distance = sorted(
        labeled_points,
        #key=lambda (point, _): distance(point, new_point))
        key=lambda point_label: distance(point_label[0], new_point))

    #近い順にk個取り出す
    k_nearest_labels = [label for _, label in by_distance[:k]]

    #多数決を行う
    return majority_vote(k_nearest_labels)
Exemple #12
0
def knn_classify(k, labeled_points, new_point):
    """each labeled point should be a pair (point, label)"""

    # order the labeled points from nearest to farthest
    by_distance = sorted(
        labeled_points,
        key=lambda point_label: distance(point_label[
            0], new_point))  #새로운 포인트와 원래 존재하는 포인트들의 거리를 구해서 정렬한후 리스트로 반환

    # find the labels for the k closest
    k_nearest_labels = [label for _, label in by_distance[:k]
                        ]  # 가장 가까운 거리에 있는 포인트들을 리스트형식으로 담아준다.

    # and let them vote
    return majority_vote(k_nearest_labels)  # 거리가 동일한 포인트들이 있을 수도 있으니까, 투표를 해야함
Exemple #13
0
def knn_classify(k, labeled_points, new_point):
    """each labeled point should be a pair (point, label)"""
    #每个标记点是一对

    # order the labeled points from nearest to farthest
    #从最近到最远点标记点
    by_distance = sorted(labeled_points,
                         key=lambda (point, _): distance(point, new_point))

    # find the labels for the k closest
    #找到最接近k的标签
    k_nearest_labels = [label for _, label in by_distance[:k]]

    # and let them vote
    #进行投票
    return majority_vote(k_nearest_labels)
Exemple #14
0
def find_eigenvector(A, tolerance=0.00001):
    guess = [1 for __ in A]

    while True:
        # 计算结果向量
        result = matrix_operate(A, guess)
        # 向量的模
        length = magnitude(result)

        # 下一个向量,标量(1/length)和向量(result)的乘法,
        next_guess = scalar_multiply(1/length, result)

        # 两个向量的距离小于某个阙值则返回更新后的向量和向量的模
        if distance(guess, next_guess) < tolerance:
            return next_guess, length # eigenvector, eigenvalue
        
        guess = next_guess
Exemple #15
0
def knn_classify(k, labeled_points, new_point):
    """매개변수설명
        k : 어느정도 가까운 것들을 찾는가
        labeled_points : 분류에 사용 될 데이터목록들
        new_point : 분류하고 싶은 데이터
                    1. 분류에 사용될 데이터들을 분류 될 데이터와 거리 순으로 정렬한다.
                    2. 정렬된 데이터 중에서 k 거리 이내에 있는 데이터 목록만 따로 majority_vote에 넘겨서
                    k 거리이내의 데이터들 중에 가장 많이 포함되 있는 라벨을 찾는다.
    """
    """each labeled point should be a pair (point, label)"""

    # order the labeled points from nearest to farthest
    by_distance = sorted(labeled_points,
                         key=lambda point_label: distance(point_label[0], new_point))

    # find the labels for the k closest
    k_nearest_labels = [label for _, label in by_distance[:k]]
    # and let them vote
    return majority_vote(k_nearest_labels)
def knn_classify(k, labeled_points, new_point):
    """
    each labeled point should be a pair (point, label)
    in our case:
     - labeled_points = [([longitude, latitude], label), ... ]
     - by_distance - sorted labeled_points by distance to new_point
       (in ascending order, so closest points are in the beginning)
     - labeled_point[0] = [longitude, latitude]; new_point = [longitude, latitude]
     - distance - standard euclidean distance (squared distance of coordinates)
    """

    # order the labeled points from nearest to farthest
    by_distance = sorted(labeled_points,
                         key=lambda labeled_point: distance(labeled_point[0], new_point))

    # find the labels for the k closest (as mentioned, closest points are in the
    # beginning of the list); by_distance = [([longitude, latitude], language), ... ]
    # so we unpack tuple ([longitude, latitude], language) into _, label
    # we can also write [point[1] for point in by_distance[:k]]
    k_nearest_labels = [label for _, label in by_distance[:k]]

    # and let them vote
    return majority_vote(k_nearest_labels)
Exemple #17
0
def random_distances(dim, num_pairs):
    return [
        distance(random_point(dim), random_point(dim))
        for _ in range(num_pairs)
    ]


# if __name__ == "__main__":
#
#     # try several different values for k
#     for k in [1, 3, 5, 7]:
#         num_correct = 0
#
#         for location, actual_language in cities:
#
#             other_cities = [other_city
#                             for other_city in cities
#                             if other_city != (location, actual_language)]
#
#             predicted_language = knn_classify(k, other_cities, location)
#
#             if predicted_language == actual_language:
#                 num_correct += 1
#
#         print k, "neighbor[s]:", num_correct, "correct out of", len(cities)
#
#     dimensions = range(1, 101, 5)
#
#     avg_distances = []
#     min_distances = []
#
#     random.seed(0)
#     for dim in dimensions:
#         distances = random_distances(dim, 10000)  # 10,000 random pairs
#         avg_distances.append(mean(distances))     # track the average
#         min_distances.append(min(distances))      # track the minimum
#         print dim, min(distances), mean(distances), min(distances) / mean(distances)
Exemple #18
0
from linear_algebra import distance
from typing import Tuple, List
from linear_algebra import vector_mean
from statistics import standard_deviation
# we are trying to create clusters

Vector = List[float]

a_to_b = distance([63, 150], [67, 160])

a_to_c = distance([63, 150], [70, 171])

b_to_c = distance([67, 160], [70, 171])



def scale(data: List[Vector]) -> Tuple[Vector, Vector]:
    dim = len(data[0])
    means = vector_mean(data)
    stdevs = [standard_deviation([vector[i] for vector in data]) for i in range(dim)]
    return means, stdevs

vectors = [[ - 3 , - 1 , 1 ], [ - 1 , 0 , 1 ], [ 1 , 1 , 1 ]]
means, stdevs = scale(vectors)
assert means == [- 1, 0 , 1]
assert stdevs == [2, 1, 0]

def rescale(data: List[Vector]) -> List[Vector]:
    dim = len(data[0])
    means, stdevs = scale(data)
def knn_classify(k, labeled_points, new_point):
    by_distance = sorted(labeled_points, key=lambda (point, _):
                         distance(point, new_point))
    k_nearest_labels = [label for _, label in by_distance[:k]]
    return majority_vote(k_nearest_labels)
def random_distances(dim, num_pairs):
    return [distance(random_point(dim), random_point(dim))
    for _ in range(num_pairs)]
Exemple #21
0
                               y, theta_0, alpha_0)


if __name__ == "__main__":

    print("using the gradient")

    v = [random.randint(-10, 10) for i in range(3)]

    tolerance = 0.0000001

    while True:
        #print v, sum_of_squares(v)
        gradient = sum_of_squares_gradient(v)  # compute the gradient at v
        next_v = step(v, gradient, -0.01)  # take a negative gradient step
        if distance(next_v, v) < tolerance:  # stop if we're converging
            break
        v = next_v  # continue if we're not
        print(v)

    print("minimum v", v)
    print("minimum value", sum_of_squares(v))
    print()

    print("using minimize_batch")

    v = [random.randint(-10, 10) for i in range(3)]

    v = minimize_batch(sum_of_squares, sum_of_squares_gradient, v)

    print("minimum v", v)
Exemple #22
0
def cluster_distance(cluster1, cluster2, distance_agg=min):
    """finds the aggregate distance between elements of cluster1
    and elements of cluster2"""
    return distance_agg([distance(input1, input2)
                        for input1 in get_values(cluster1)
                        for input2 in get_values(cluster2)])
def knn_classify(k, labeled_points, new_point):
    distance_sort = lambda city: distance(city[0], new_point)
    by_distance = sorted(labeled_points, key=distance_sort)
    k_nearest_labells = [label for _, label in by_distance[:k]]
    return majority_vote(k_nearest_labells)
                               negate_all(gradient_fn),
                               x, y, theta_0, alpha_0)

if __name__ == "__main__":

    print("using the gradient")

    v = [random.randint(-10,10) for i in range(3)]

    tolerance = 0.0000001

    while True:
        #print v, sum_of_squares(v)
        gradient = sum_of_squares_gradient(v)   # compute the gradient at v
        next_v = step(v, gradient, -0.01)       # take a negative gradient step
        if distance(next_v, v) < tolerance:     # stop if we're converging
            break
        v = next_v                              # continue if we're not

    print("minimum v", v)
    print("minimum value", sum_of_squares(v))
    print()


    print("using minimize_batch")

    v = [random.randint(-10,10) for i in range(3)]

    v = minimize_batch(sum_of_squares, sum_of_squares_gradient, v)

    print("minimum v", v)
    return add(v, step)


def sum_of_squares_gradient(v: Vector) -> Vector:
    return [2 * v_i for v_i in v]


# pick a random starting point
v = [random.uniform(-10, 10) for i in range(3)]

for epoch in range(1000):
    grad = sum_of_squares_gradient(v)
    v = gradient_step(v, grad, -0.01)
    print(epoch, v)

assert distance(v, [0, 0, 0]) < 0.001

#  x ranges from -50 t0 49, y is always 20 * x + 5
inputs = [(x, 20 * x + 5) for x in range(-50, 50)]


def linear_gradient(x: float, y: float, theta: Vector) -> Vector:
    slope, intercept = theta
    predicted = slope * x + intercept
    error = (predicted - y)
    # squared_error = error ** 2
    grad = [2 * error * x, 2 * error]
    return grad


# start with random values for slope and intercept
def maximize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0=0.01):
    return minimize_stochastic(negate(target_fn),
                                negate_all(gradient_fn)
                                x, y, theta_0, alpha_0)

if __name__ == "__main__":

    # pick a random starting point
    v = [random.randint(-10, 10) for i in range(3)]

    tolerance = 0.0000001

    while True:
        gradient = sum_of_squares_gradient(v)   # compute the gradient at v
        next_v = step(v, gradient, -0.01)       # take a negative gradient step
        if distance(next_v, v) < tolerance:
            break
        v = next_v
    
    print("minimum v", v)
    print("minimum value", sum_of_squares(v))
    print()

    print("using minimize_batch")

    v = [random.randint(-10,10) for i in range(3)]

    v = minimize_batch(sum_of_squares, sum_of_squares_gradient, v)

    print("minimum v", v)
    print("minimum value", sum_of_squares(v))
Exemple #27
0
def cluster_distance(cluster1, cluster2, distance_agg=min):
    """ compute all the pairwise distances between cluster1 and cluster2
        and apply _distance_agg to the resulting list """
    return distance_agg([distance(input1, input2)
        for input1 in get_values(cluster1)
        for input2 in get_values(cluster2)])
    return minimize_stochastic(negate(target_fn), negate_all(gradient_fn), x,
                               y, theta_0, alpha_0)


if __name__ == '__main__':

    print('using the gradient')

    v = [random.randint(-10, 10) for i in range(3)]

    tolerance = 0.0000001

    while True:
        gradient = sum_of_squares_gradient(v)  # computa o gradiente em v
        next_v = step(v, gradient, -0.01)  # pega um passo gradiente negativo
        if distance(next_v, v) < tolerance:  # para se estivermos convergindo
            break
        v = next_v  # continua se não estivermos

    print(f'Minimum v: {v}')
    print(f'Minimum value: {sum_of_squares(v)}')
    print('------------------------------')

    print('Using minimize_batch')

    v = [random.randint(-10, 10) for i in range(3)]

    v = minimize_batch(sum_of_squares, sum_of_squares_gradient, v)

    print(f'minimum v: {v}')
    print(f'minimum value: {sum_of_squares(v)}')
def knn_classify(k, labeled_points, new_points):
    by_distance = sorted(labeled_points,
                         key=lambda (point, _): distance(point, new_points))
    k_nearest_labels = [label for _, label in by_distance[:k]]
    return majority_vote(k_nearest_labels)
def step(v, direction, step_size):
	return [v_i + step_size * direction_i for v_i, direction_i in zip(v,direction)]

def sum_of_squares_gradient(v):
	return [2 * v_i for v_i in v]

# Pick a random starting point.
v = [random.randint(-10,10) for i in range(3)]

tolerance = 0.0000001

while True:
	gradient = sum_of_squares_gradient(v)
	next_v = step(v, gradient, -0.01)
	if distance(next_v,v) < tolerance:
		break
	v = next_v

print v

step_sizes = [100, 10, 1, 0.1, 0.01, 0.001, 0.0001, 0.00001]

def safe(f):
	""" return a new function that's the same as f,
	except that it outputs infinity whenever f produces an error"""
	def safe_f(*args,**kwargs):
		try:
			return f(*args,**kwargs)
		except:
			return float('inf')
 def test_distance(self):
     self.assertEqual(5, distance([5, 4], [1, 1]))
Exemple #32
0
def cluster_distance(cluster1, cluster2, distance_agg=min):
    return distance_agg([
        distance(input1, input2) for input1 in get_values(cluster1)
        for input2 in get_values(cluster2)
    ])
Exemple #33
0
C = la.scalar_multiply(10, A)
print("10 * A = ", C)

C = la.vector_mean([A, B])
print("A and B mean = ", C)

C = la.dot(A, B)
print("A dot B = ", C)

C = la.sum_of_squares(A)
print("A^2's summary = ", C)

C = la.magnitude(A)
print("A's magnitude = ", C)

C = la.distance(A, B)
print("A's distance = ", C)

print()
print("*** matrix ......")
M = [[1, 2, 3], [5, 6, 7], [3, 6, 9]]
print("M = ", M)

shape = la.shape(M)
print("M's shape = ", shape)

row_1 = la.get_row(M, 1)
print("M[1,:] = ", row_1)

col_1 = la.get_column(M, 1)
print("M[:1] = ", col_1)
def random_distances(dim, num_pairs):
    return [distance(random_point(dim), random_point(dim))
            for _ in range(num_pairs)]
Exemple #35
0
    plt.plot(xs, estimates, 'b+', label='Estimate')  # blue +
    plt.legend(loc=9)
    plt.show()
    plt.close()

    # pick a random starting point
    v = [random.uniform(-10, 10) for i in range(3)]
    print(v)

    for epoch in range(1000):
        grad = sum_of_squares_gradient(v)  # compute the gradient at v
        v = gradient_step(v, grad,
                          -0.01)  # take a fixed negative gradient step
        print(epoch, v)

    assert distance(v, [0, 0, 0]) < 0.001  # v should be close to 0

    print("")
    print("Using gradient descent to fit models")
    # x ranges from -50 to 49, y is always 20 * x + 5
    inputs = [(x, 20 * x + 5) for x in range(-50, 50)]

    # Start with random values for slope and intercept.
    theta = [random.uniform(-1, 1), random.uniform(-1, 1)]

    learning_rate = 0.001

    for epoch in range(5000):
        # Compute the mean of the gradients
        grad = vector_mean([linear_gradient(x, y, theta) for x, y in inputs])
        # Take a step in that direction