if __name__ == '__main__': with open('data/MasterApartmentData.json') as f: my_dict = json.load(f) dframe = DataFrame(my_dict) dframe = dframe.T dframe = dframe[['content', 'laundry', 'price', 'dog', 'bed', 'bath', 'feet', 'long', 'parking', 'lat', 'smoking', 'getphotos', 'cat', 'hasmap', 'wheelchair', 'housingtype']] #save number of data points that we started with og = dframe.shape[0] #replace shared bathrooms with "half" of a bathroom dframe.bath = dframe.bath.replace('shared',0.5) dframe.bath = dframe.bath.replace('split',0.5) # We only want to model dframe = dframe[dframe.lat > 45.4][dframe.lat < 45.6][dframe.long < -122.0][dframe.long > -123.5] aftermap = dframe.shape[0] print og - aftermap data = [[dframe['lat'][i],dframe['long'][i]] for i in dframe.index] from sklearn.cluster import KMeans km = KMeans(n_clusters=40) km.fit(data) neighborhoods = km.cluster_centers_