def generate_raw_samples(year=2012): """ Generate raw features for all samples. Returns ------- Y : Numpy.Array Crime counts D : Numpy.Array Demo features P : Numpy.Array POI features T : Numpy.Array Taxi flow graph embedding G : Numpy.Array Geographic graph embedding """ Y, D, P, Tf, Gd = extract_raw_samples(year) T = get_graph_embedding_features('taxi-CA-static.vec') G = get_graph_embedding_features('geo-CA.vec') return Y, D, P, T, G
def generate_raw_samples(): """ Generate raw features for all samples. Returns ------- Y : Numpy.Array Crime counts D : Numpy.Array Demo features P : Numpy.Array POI features T : Numpy.Array Taxi flow graph embedding G : Numpy.Array Geographic graph embedding """ Y, D, P, Tf, Gd = extract_raw_samples(year=2014) T = get_graph_embedding_features('taxi_all.txt') G = get_graph_embedding_features('geo_all.txt') return Y, D, P, T, G
def plot_hourly_crime(): plt.rc("axes", linewidth=2) plt.figure(figsize=(8, 6)) for year in range(2013, 2016): Y, D, P, T, G = extract_raw_samples(year) population = D[:, 0] Yh = pickle.load( open("../chicago-hourly-crime-{0}.pickle".format(year))) Yh = Yh / population * 10000 if year == 2015: Yh = Yh * 2 plt.plot(Yh.mean(axis=1), lw=3) plt.legend(["2013", "2014", "2015"], fontsize=20, loc='best') plt.xlabel("Hour in day", fontsize=20) plt.ylabel("Average crime rate", fontsize=24) plt.axis([0, 23, 10, 70]) plt.gca().set_xticks([0, 6, 12, 18, 23]) plt.gca().set_xticklabels(("0:00", "6:00", "12:00", "18:00", "23:00")) plt.grid(b=True, axis="both", lw=1) plt.tick_params(labelsize=18) plt.savefig("crime-rate-hourly.pdf")
def evaluate_various_embedding_features_with_lag_model(year, spatial): Y, D, P, T, G = extract_raw_samples(int(year)) # predict hourly crime # population = D[:,0] # Yh = pickle.load(open("../chicago-hourly-crime-{0}.pickle".format(year))) # Yh = Yh / population * 10000 # predict average income header, income = retrieve_income_features() Yh = np.repeat(income[:, 0, None], 24, axis=1) Yh = Yh.T # predict average house price # Yh = retrieve_averge_house_price() # Yh = np.repeat(Yh[:,None], 24, axis=1) # Yh = Yh.T assert Yh.shape == (24, N) with open("CAflowFeatures.pickle") as fin: mf = pickle.load(fin) line = pickle.load(fin) dwt = pickle.load(fin) dws = pickle.load(fin) hdge = pickle.load(fin) mf_mre = [] mf_mae = [] line_mre = [] line_mae = [] dw_mre = [] dw_mae = [] for h in range(24): print h Yhat = Yh[h, :].reshape((N, 1)) if spatial == "nospatial": features_ = ['demo', 'poi', 'taxi'] elif spatial == "onlyspatial": features_ = ['demo', 'poi', 'geo'] elif spatial == "usespatial": features_ = ['demo', 'poi', 'geo', 'taxi'] else: features_ = ["demo", "poi"] # MF models Tmf = mf[h] # sum([e for e in mf.values()]) import nimfa nmf = nimfa.Nmf( G, rank=4, max_iter=100 ) #, update="divergence", objective="conn", conn_change=50) nmf_fit = nmf() src = nmf_fit.basis() dst = nmf_fit.coef() Gmf = np.concatenate((src, dst.T), axis=1) mae, mre = leaveOneOut_error(Yhat, D, P, similarityMatrix(Tmf), Yhat, keep_topk(similarityMatrix(Gmf), 20), Yhat, features=features_, taxi_norm="bydestination") mf_mre.append(mre) mf_mae.append(mae) print "MF MRE: {0}".format(mre) # LINE model Tline = line[h] # sum([e for e in line.values()]) Gline = get_graph_embedding_features('geo_all.txt') mae, mre = leaveOneOut_error(Yhat, D, P, similarityMatrix(Tline), Yhat, keep_topk(similarityMatrix(Gline)), Yhat, features=features_, taxi_norm="bydestination") line_mre.append(mre) line_mae.append(mae) print "LINE_slotted MRE: {0}".format(mre) # deepwalk # TGdw = dw[h] # sum([e for e in dw.values()]) mae, mre = leaveOneOut_error( Yhat, D, P, similarityMatrix(dwt[h]), Yhat, similarityMatrix(dws[h]), Yhat, features=features_, #['demo', 'poi', 'geo'], taxi_norm="none") dw_mre.append(mre) dw_mae.append(mae) print "HDGE MRE: {0}".format(mre) return mf_mre, line_mre, dw_mre, mf_mae, line_mae, dw_mae
def evaluate_various_flow_features_with_concatenation_model(year, spatial): Y, D, P, T, G = extract_raw_samples(int(year)) population = D[:, 0] Yh = pickle.load(open("../chicago-hourly-crime-{0}.pickle".format(year))) Yh = Yh / population * 10000 assert Yh.shape == (24, N) with open("CAflowFeatures.pickle") as fin: mf = pickle.load(fin) line = pickle.load(fin) dwt = pickle.load(fin) dws = pickle.load(fin) hdge = pickle.load(fin) mf_mre = [] mf_mae = [] line_mre = [] line_mae = [] dw_mre = [] dw_mae = [] for h in range(24): print h # MF models Tmf = mf[h] # sum([e for e in mf.values()]) import nimfa nmf = nimfa.Nmf( G, rank=4, max_iter=100 ) #, update="divergence", objective="conn", conn_change=50) nmf_fit = nmf() src = nmf_fit.basis() dst = nmf_fit.coef() Gmf = np.concatenate((src, dst.T), axis=1) if spatial == "nospatial": X = np.concatenate((D, P, Tmf), axis=1) elif spatial == "onlyspatial": X = np.concatenate((D, P, Gmf), axis=1) elif spatial == "usespatial": X = np.concatenate((D, P, Tmf, Gmf), axis=1) mre, mae = leaveOneOut_eval(X, Yh[h, :].reshape((N, 1))) mf_mre.append(mre) mf_mae.append(mae) print "MF MRE: {0}".format(mre) # LINE model Tline = line[h] # sum([e for e in line.values()]) Gline = get_graph_embedding_features('geo_all.txt') if spatial == "nospatial": X = np.concatenate((D, P, Tline), axis=1) elif spatial == "onlyspatial": X = np.concatenate((D, P, Gline), axis=1) elif spatial == "usespatial": X = np.concatenate((D, P, Tline, Gline), axis=1) mre, mae = leaveOneOut_eval(X, Yh[h, :].reshape((N, 1))) line_mre.append(mre) line_mae.append(mae) print "LINE_slotted MRE: {0}".format(mre) # deepwalk if spatial == 'nospatial': TGdw = dwt[h] # sum([e for e in dw.values()]) elif spatial == 'onlyspatial': TGdw = dws[h] elif spatial == 'usespatial': TGdw = hdge[h] # TGdw = dw[h] # sum([e for e in dw.values()]) X = np.concatenate((D, P, TGdw), axis=1) mre, mae = leaveOneOut_eval(X, Yh[h, :].reshape((N, 1))) dw_mre.append(mre) dw_mae.append(mae) print "HDGE MRE: {0}".format(mre) return mf_mre, line_mre, dw_mre, mf_mae, line_mae, dw_mae