def main_evaluate_feature_setting(year=2010, crime_t=['total']): feature_settings = [['demo'], ['demo', 'poi'], ['demo', 'taxi'], ['demo', 'poi', 'taxi'], ['demo', 'geo'], ['demo', 'geo', 'poi'], ['demo', 'geo', 'taxi'], ['all']] Y, D, P, Tf, Gd = extract_raw_samples(year, crime_t) H = [0.08, 0.09, 0.1, 0.15, 0.2, 0.3, 0.5] nb_MAEs = [] nb_MREs = [] gwnbr_MAEs = [] gwnbr_MREs = [] for feature_setting in feature_settings: mae, mre = leaveOneOut_error(Y, D, P, Tf, Y, Gd, Y, feature_setting) nb_MAEs.append(mae) nb_MREs.append(mre) # Tune bandwidth for GWR model gwr_mae = sys.maxint gwr_mre = 1.0 for h in H: gwr_gamma = generate_GWR_weight(h) mae, mre = leaveOneOut_error(Y, D, P, Tf, Y, Gd, Y, feature_setting, gwr_gamma) if mae < gwr_mae: gwr_mae = mae gwr_mre = mre gwnbr_MAEs.append(gwr_mae) gwnbr_MREs.append(gwr_mre) print "Settings\t", for f in feature_settings: feature_header = [ele[0] for ele in f] print '+'.join(feature_header) + "\t", print "" print "NB_MAE\t" + '\t'.join(map(str, nb_MAEs)) print "NB_MRE\t" + '\t'.join(map(str, nb_MREs)) print "GWNBR_MAE\t" + '\t'.join(map(str, gwnbr_MAEs)) print "GWNBR_MRE\t" + '\t'.join(map(str, gwnbr_MREs))
def main_compare_taxi_normalization_method(): H = [0.08, 0.09, 0.1, 0.15, 0.2, 0.3, 0.5] for year in range(2010, 2015): print year Y, D, P, Tf, Gd = extract_raw_samples(year, ["total"]) for taxi_norm in ["bydestination", "bysource", "none"]: gwr_mae = sys.maxint gwr_mre = 1.0 best_h = 0 for h in H: gwr_gamma = generate_GWR_weight(h) mae, mre = leaveOneOut_error(Y, D, P, Tf, Y, Gd, Y, ["all"], gwr_gamma, taxi_norm) if mae < gwr_mae: gwr_mae = mae gwr_mre = mre best_h = h print taxi_norm, gwr_mae, gwr_mre, best_h