Ejemplo n.º 1
0
def scale(data: List[Vector]) -> Tuple[Vector, Vector]:
    """Returns mean and standard deviation of each feature"""
    dim = len(data[0])
    means = vector_mean(data)
    stdevs = [
        standard_deviation([vector[i] for vector in data]) for i in range(dim)
    ]
    return means, stdevs
Ejemplo n.º 2
0
def cluster_means(k: int,
                 imputs: List[Vector],
                 assignments: List[int]) -> List[Vector]:
    # cluster i contains the inputs whose assignment is i
    clusters = [[] for i in range(k)]
    
    for input, assignment in zip(inputs, assignments):
        clusters[assignment].append(input)
        
    # if cluster is empty then just use a random point
    return [vector_mean(cluster) if cluster else random.choice(inputs)
            for cluster in clusters]
Ejemplo n.º 3
0
def least_squares_fit(xs: List[Vector],
                      ys: Vector,
                      alpha: float,
                      learning_rate: float = 0.001,
                      num_steps: int = 1000,
                      batch_size: int = 1) -> Vector:
    """Finds beta that minimizes the sum of squared errors
    assuming the model dot(x, beta)"""
    # start with random guess
    guess = [random.random() for _ in xs[0]]

    for _ in tqdm.trange(num_steps, desc="least squares fit"):
        for start in range(0, len(xs), batch_size):
            batch_xs = xs[start:start + batch_size]
            batch_ys = ys[start:start + batch_size]

            gradient = vector_mean([
                sqerror_ridge_gradient(x, y, guess, alpha)
                for x, y in zip(batch_xs, batch_ys)
            ])

            guess = gradient_step(guess, gradient, -learning_rate)
    return guess
Ejemplo n.º 4
0
        # choose the last merged of our clusters
        next_cluster = min(clusters, key = get_merge_order)
        clusters = [c for c in clusters if c != next_cluster]
        
        # and add its children to the list (i.e. unmerge it)
        clusters.extend(get_children(next_cluster))
        
    # once we have enough clusters, return those
    return clusters


three_clusters = [get_values(cluster) for cluster in generate_clusters(base_cluster, 3)]
from matplotlib import pyplot as plt


for i, cluster, marker, color in zip([1,2,3],
                                    three_clusters,
                                    ['D','o', '*'],
                                    ['r','g','b']):
    xs, ys = zip(*cluster) # magic unzipping trick
    plt.scatter(xs, ys, color = color, marker = marker)
    
    # put a number at the mean of a cluster
    x,y = vector_mean(cluster)
    plt.plot(x,y, marker = '$' + str(i) + '$', color = 'black')
    
plt.title("User Locations -- 3 Bottom-up Clusters, Min")
plt.xlabel("blocks east of city center")
plt.ylabel("blocks north of city center")
plt.show()    
Ejemplo n.º 5
0
def linear_gradient(x: float, y: float, theta: Vector) -> Vector:
    slope, intercept = theta
    predicted = slope * x + intercept  # The prediction of the model
    error = (predicted - y)  #error is predicted - actual
    squared_error = error**2  #Minime the squared error
    grad = [2 * error * x, 2 * error]
    return grad


# Start with random values for slope and intercept
theta = [random.uniform(-1, 1), random.uniform(-1, 1)]

learning_rate = 0.001
for epoch in range(5000):
    #Compute the mean of the gradients
    grad = vector_mean([linear_gradient(x, y, theta) for x, y in inputs])
    # Take a step in that direction
    theta = gradient_step(theta, grad, -learning_rate)
    print(epoch, theta)

slope, intercept = theta
assert 19.9 < slope < 20.1  # Slope should be around 20
assert 4.9 < intercept < 5.1  # Intercept should be around 5
"""
##############################################################################################################
################# Split data into mini-batches and use gradient descent to fit models ########################
##############################################################################################################
"""

from typing import TypeVar, List, Iterator
T = TypeVar('T')
Ejemplo n.º 6
0
def de_mean(data: List[Vector]) -> List[Vector]:
    """Recenters the data to have 0 mean in every dimension"""
    mean = vector_mean(data)
    return [subtract(vector, mean) for vector in data]