def squared_clustering_errors(inputs: List[Vector], k: int) -> float: """finds the total squared error from the k-means clustering of inputs""" clusterer = kMeans(k) clusterer.train(inputs) means = clusterer.means assignments = [clusterer.classify(input) for input in inputs] return sum(squared_distance(input, means[cluster]) for input, cluster in zip(inputs, assignments))
def squared_clustering_errors(inputs, k): """finds the total squared error from k-means clustering the inputs""" clusterer = KMeans(k) clusterer.train(inputs) means = clusterer.means() assignments = map(clusterer.classify, inputs) return sum(squared_distance(input, means[cluster]) for input, cluster in zip(inputs, assignments))
def classify(self, input): """return the index of the cluster closest to the input""" return min(range(self.k), key=lambda i: squared_distance(input, self.means[i]))
[[random.random() for _ in range(10 + 1)] for _ in range(NUM_HIDDEN)], # Output_layer: NUM_HIDDEN inputs -> 4 outputs [[random.random() for _ in range(NUM_HIDDEN + 1)] for _ in range(4)] ] from vector_operations import squared_distance learning_rate = 1.0 with tqdm.trange(500) as t: for epoch in t: epoch_loss = 0.0 for x, y in zip(xs, ys): predicted = feed_forward(network, x)[-1] epoch_loss += squared_distance(predicted, y) gradients = sqerror_gradients(network, x, y) # Take gradient step for each neuron in each layer network = [[ gradient_step(neuron, grad, -learning_rate) for neuron, grad in zip(layer, layer_grad) ] for layer, layer_grad in zip(network, gradients)] t.set_description(f"fizz buzz (loss: {epoch_loss})") def argmax(xs: list) -> int: """Returns the index of the largest value""" return max(range(len(xs)), key=lambda i: xs[i])
assignments = new_assignments self.means = cluster_means(self.k, inputs, assignments) t.set_description(f"changed: {num_changed}/{len(inputs)}") # Example: meetups inputs: List[List[float]] = [[-14,-5],[13,13],[20,23],[-19,-11],[-9,-16],[21,27],[-49,15],[26,13],[-46,5],[-34,-1],[11,15],[-49,0],[-22,-16],[19,28],[-12,-8],[-13,-19],[-41,8],[-11,-6],[-25,-9],[-18,-3]] random.seed(12) clusterer = kMeans(k = 3) clusterer.train(inputs) means = sorted(clusterer.means) # sort for the unit test # Check that the means are close to what we expect assert squared_distance(means[0], [-44, 5]) < 1 assert squared_distance(means[1], [-16, -10]) < 1 assert squared_distance(means[2], [18, 20]) < 1 random.seed(12) clusterer = kMeans(k = 2) clusterer.train(inputs) means = sorted(clusterer.means) assert len(means) == 2 assert squared_distance(means[0], [-26, -5]) < 1 assert squared_distance(means[1], [18, 20]) < 1