def ordinary_kriging_evaluation(year): """ Under leave-one-out setting, use only crime rate. """ from pykrige.ok import OrdinaryKriging from sklearn.model_selection import LeaveOneOut y_cnt = retrieve_crime_count(year) demo = generate_corina_features() population = demo[1][:,0].reshape(demo[1].shape[0], 1) Y = y_cnt / population * 10000 coords = get_centroid_ca() data = np.concatenate((coords, Y), axis=1) loo = LeaveOneOut() errors = [] for train_idx, test_idx in loo.split(data): x_train = data[train_idx,:] coords_test = data[test_idx, [0,1]] y_test = data[test_idx, 2] OK = OrdinaryKriging(x_train[:,0], x_train[:,1], x_train[:,2], variogram_model="linear") z, var = OK.execute("points", coords_test[0], coords_test[1]) errors.append(abs(z[0] - y_test[0])) print np.mean(errors), np.mean(errors) / np.mean(Y) return errors
def predict_crime_with_embedding(): ge = get_graph_embedding_features("taxi_all.txt") y_cnt = retrieve_crime_count(2010) demo = generate_corina_features() population = demo[1][:,0].reshape(demo[1].shape[0], 1) y = y_cnt / population * 10000 er = leaveOneOut_error(y, ge) print er return er
def predict_crime_with_embedding(): ge = get_graph_embedding_features("taxi_all.txt") y_cnt = retrieve_crime_count(2010) demo = generate_corina_features() population = demo[1][:, 0].reshape(demo[1].shape[0], 1) y = y_cnt / population * 10000 er = leaveOneOut_error(y, ge) print er return er
def extract_raw_samples(year=2010, crime_t=['total'], crime_rate=True): """ Extract all samples with raw labels and features. Return None if the corresponding feature is not selected. This function is called once only to avoid unnecessary disk I/O. Input: year - which year to study crime_t - crime types of interest, e.g. 'total' crime_rate - predict crime_rate or not (count) Output: Y - crime rate / count D - demo feature P - POI feature Tf - taxi flow matrix (count) Gd - geo weight matrix """ # Crime count y_cnt = retrieve_crime_count(year, col = crime_t) # Crime rate / count demo = generate_corina_features() population = demo[1][:,0].reshape(demo[1].shape[0], 1) Y = y_cnt / population * 10000 if crime_rate else y_cnt assert(Y.shape == (N,1)) # Demo features D = demo[1] # POI features P = getFourSquarePOIDistribution(useRatio=False) # Taxi flow matrix Tf = getTaxiFlow(normalization="none") # Geo weight matrix Gd = generate_geographical_SpatialLag_ca() return Y, D, P, Tf, Gd
def extract_raw_samples(year=2010, crime_t=['total'], crime_rate=True): """ Extract all samples with raw labels and features. Return None if the corresponding feature is not selected. This function is called once only to avoid unnecessary disk I/O. Input: year - which year to study crime_t - crime types of interest, e.g. 'total' crime_rate - predict crime_rate or not (count) Output: Y - crime rate / count D - demo feature P - POI feature Tf - taxi flow matrix (count) Gd - geo weight matrix """ # Crime count y_cnt = retrieve_crime_count(year, col = crime_t) # Crime rate / count demo = generate_corina_features() population = demo[1][:,0].reshape(demo[1].shape[0], 1) Y = y_cnt / population * 10000 if crime_rate else y_cnt assert(Y.shape == (77,1)) # Demo features D = demo[1] # POI features P = getFourSquarePOIDistribution(useRatio=False) # Taxi flow matrix Tf = getTaxiFlow(normalization="none") # Geo weight matrix Gd = generate_geographical_SpatialLag_ca() return Y, D, P, Tf, Gd
mf = pickle.load(fin) line = pickle.load(fin) dwt = pickle.load(fin) dws = pickle.load(fin) hdge = pickle.load(fin) for h in range(24): Fn = similarityMatrix(hdge[h]) x, y, xp, yp, lp = generate_point(Fn, Y) f = plt.figure() plt.scatter(x, y, color='red') plt.show() demo = generate_corina_features() y_cnt = retrieve_crime_count(2013) population = demo[1][:, 0].reshape(demo[1].shape[0], 1) Y = y_cnt / population * 10000 F = getTaxiFlow(normalization="none") x, y, xp, yp, lp = generate_point(F, Y) plt.rc("axes", linewidth=2) f = plt.figure(figsize=(8, 6)) plt.scatter(x, y, s=16) plt.plot([-100, -100, 3500, -100], [3000, -3000, 0, 3000], linewidth=2, color='blue')