num_centroids = 5 k = 5 max_window_size = 1000 num_points = 15000 step_size = 1 if __name__ == "__main__": i = 0 centroids = kmeans.initialize(num_centroids, -5, 5) x = Stream('x') m = KMeans.KMeansStream(draw=draw, output=output, k=k) model = Stream_Learn(data_train=x, data_out=x, train_func=m.train, predict_func=m.predict, min_window_size=k, max_window_size=max_window_size, step_size=step_size, num_features=2) y = model.run() while i < num_points: index = np.random.randint(0, num_centroids) z = np.random.rand(1, 2) * 2 - 1 centroids[index] = centroids[index].reshape(1, 2) + z * 2 x.extend([tuple(kmeans.initializeDataCenter(centroids[index], 1, 1).tolist()[0])]) print i i += 1 print "Average number of iterations: ", m.avg_iterations print "Average error: ", m.avg_error
def all_func(x, y, model, state, window_state): if state is None: state = Geomap.Geomap(llcrnrlat=20, llcrnrlon=-126, urcrnrlat=60, urcrnrlon=-65) state.clear() state.plot(x, kmeans.findClosestCentroids(x, model.centroids), s=70) # state.plot(model.centroids, color = 'Red', s = 50) return state x = Stream('x') m = KMeans.KMeansStream(draw=False, output=False, k=5) model = Stream_Learn(x, x, m.train, m.predict, 5, 30, 1, 2, all_func=all_func) y = model.run() r = requests.get('http://stream.meetup.com/2/rsvps', stream=True) i = 0 for line in r.iter_lines(): if line: data = json.loads(line) lat, lon = data['group']['group_lat'], data['group']['group_lon'] if data['group']['group_country'] == 'us': x.extend([(lat, lon)]) print i i += 1