def evaluate_multi(url, url2, time_lags=24): cr = Crawling() preds = utils.load_file(url) preds = np.array(preds) lt = len(preds) labels = utils.load_file(url2) labels = np.array(labels) loss_mae0, loss_mae1 = 0.0, 0.0 loss_rmse0, loss_rmse1 = 0.0, 0.0 r2_0, r2_1 = 0.0, 0.0 for i, d in enumerate(preds): lb_i = i * pr.strides + time_lags + 1 mae0, mse0, r2 = get_evaluation(d[:time_lags, :], labels[lb_i:(lb_i + time_lags), :, 0]) # mae1, mse1 = get_evaluation(d[:time_lags,:,1], labels[lb_i:(lb_i+time_lags),:,1]) loss_rmse0 += mse0 # loss_rmse1 += mse1 loss_mae0 += mae0 # loss_mae1 += mae1 r2_0 += r2 loss_mae0 = loss_mae0 / lt * 300 loss_mae1 = loss_mae1 / lt * 300 loss_rmse0 = sqrt(loss_rmse0 / lt) * 300 loss_rmse1 = sqrt(loss_rmse1 / lt) * 300 r2_0 = r2_0 / lt print("MAE: %.6f %.6f" % (loss_mae0, cr.ConcPM25(loss_mae0))) print("RMSE: %.6f %.6f" % (loss_rmse0, cr.ConcPM25(loss_rmse0))) print("R2 Score: %.6f" % r2_0)
def print_accumulate_error(loss_mae, loss_rmse, decoder_length, forecast_factor=0): cr = Crawling() for x in xrange(decoder_length): print("%ih" % (x + 1)) if not forecast_factor: print("S MAE: %.6f %.6f" % (loss_mae[x], cr.ConcPM25(loss_mae[x]))) print("S RMSE: %.6f %.6f" % (loss_rmse[x], cr.ConcPM25(loss_rmse[x]))) else: print("S PM10 MAE: %.6f %.6f" % (loss_mae[x], cr.ConcPM10(loss_mae[x]))) print("S PM10 RMSE: %.6f %.6f" % (loss_rmse[x], cr.ConcPM10(loss_rmse[x]))) if x > 0: loss_mae[x] += loss_mae[x - 1] t_mae = loss_mae[x] / (x + 1) loss_rmse[x] += loss_rmse[x - 1] t_rmse = loss_rmse[x] / (x + 1) if not forecast_factor: print("T MAE: %.6f %.6f" % (t_mae, cr.ConcPM25(t_mae))) print("T RMSE: %.6f %.6f" % (t_rmse, cr.ConcPM25(t_rmse))) else: print("T PM10 MAE: %.6f %.6f" % (t_mae, cr.ConcPM10(t_mae))) print("T PM10 RMSE: %.6f %.6f" % (t_rmse, cr.ConcPM10(t_rmse)))
def evaluate_single_pred(url, url2, decoder_length=8): cr = Crawling() data = utils.load_file(url) if type(data) is list: data = np.asarray(data) lt = data.shape[0] * data.shape[1] data = np.reshape(data, (lt, 25)) dtl = len(data) labels = utils.load_file(url2) labels = np.asarray(labels) loss_mae = 0.0 loss_rmse = 0.0 r2_total = 0.0 for i, d in enumerate(data): pred_t = np.asarray(d).flatten() lb_i = i * pr.strides + 24 lbt = labels[lb_i:(lb_i + decoder_length), :, 0] lbg = lbt[decoder_length - 1, :].flatten() mae, mse, r2 = get_evaluation(pred_t, lbg) loss_mae += mae loss_rmse += mse r2_total += r2 utils.update_progress((i + 1.0) / dtl) loss_mae = loss_mae / lt * 300 loss_rmse = sqrt(loss_rmse / lt) * 300 r2_total = r2_total / lt print("MAE: %.6f %.6f" % (loss_mae, cr.ConcPM25(loss_mae))) print("RMSE: %.6f %.6f" % (loss_rmse, cr.ConcPM25(loss_rmse))) print("R2 score: %.6f" % r2_total)
def evaluate_sp(url, url2, decoder_length=24, is_grid=True, grid_eval=True): cr = Crawling() map_ = heatmap.build_map() data = utils.load_file(url) if type(data) is list: data = np.asarray(data) if len(data.shape) == 4: lt = data.shape[0] * data.shape[1] else: lt = data.shape[0] if is_grid: data = np.reshape(data, (lt, data.shape[-2], 25, 25)) else: data = np.reshape(data, (lt, data.shape[-2], 25)) labels = utils.load_file(url2) labels = np.asarray(labels) loss_mae = 0.0 loss_rmse = 0.0 r2_total = 0.0 for i, d in enumerate(data): d = d[:decoder_length, :, :] pred_t = [] if is_grid: for d_ in d: d_t = heatmap.clear_interpolate_bound(np.asarray(d_), map_) pred_t.append(d_t) else: if grid_eval: for d_ in d: d_t = heatmap.fill_map(d_, map_) pred_t.append(d_t) else: pred_t = d lb_i = i * pr.strides + 24 lbt = labels[lb_i:(lb_i + decoder_length), :, 0] if grid_eval: lbg = [] for x in lbt: x_l = heatmap.fill_map(x, map_) lbg.append(x_l) lbg = np.asarray(lbg) lbg = lbg.flatten() else: lbg = lbt.flatten() pred_t = np.asarray(pred_t) pred_t = pred_t.flatten() mae, mse, r2 = get_evaluation(pred_t, lbg) loss_mae += mae loss_rmse += mse r2_total += r2 utils.update_progress((i + 1.0) / lt) loss_mae = loss_mae / lt * 300 loss_rmse = sqrt(loss_rmse / lt) * 300 r2_total = r2_total / lt print("MAE: %.6f %.6f" % (loss_mae, cr.ConcPM25(loss_mae))) print("RMSE: %.6f %.6f" % (loss_rmse, cr.ConcPM25(loss_rmse))) print("R2 Score: %.6f" % r2_total)
def evaluate_lstm(url, url2, decoder_length=24, forecast_factor=0, is_classify=False): data = utils.load_file(url) if type(data) is list: data = np.asarray(data) lt = data.shape[0] * data.shape[1] data = np.reshape(data, (lt, data.shape[-1])) if decoder_length > data.shape[-1]: decoder_length = data.shape[-1] dtl = len(data) labels = utils.load_file(url2) labels = np.asarray(labels) if not is_classify: loss_mae = [0.0] * decoder_length loss_rmse = [0.0] * decoder_length else: acc = 0. #: r2_total = 0.0 cr = Crawling() for i, d in enumerate(data): if decoder_length < data.shape[-1]: pred_t = d[:decoder_length] else: pred_t = d lb_i = i * pr.strides + 24 lbt = np.mean(labels[lb_i:(lb_i + decoder_length), :, forecast_factor], axis=1) a = 0. for t_i, (p, l) in enumerate(zip(pred_t, lbt)): if not is_classify: # mae, mse, _ = get_evaluation(p, l) mae = abs(cr.ConcPM10(p * 300) - cr.ConcPM10(l * 300)) loss_mae[t_i] += mae # loss_rmse[t_i] += mse else: a += classify_data(pred_t, lbt, forecast_factor) if is_classify: a = a / decoder_length acc += a # r2_total += r2 utils.update_progress((i + 1.0) / dtl) if not is_classify: loss_mae = np.array(loss_mae) / lt # loss_rmse = [sqrt(x / lt) * 300 for x in loss_rmse] # print("R2 score: %.6f" % r2_total) print_accumulate_error(loss_mae, loss_rmse, decoder_length, forecast_factor=forecast_factor) else: acc = acc / lt * 100 print("accuracy %.4f" % acc)
def convert_pm(pm, us_pm, r): crawling = Crawling() pm_ = [] # extract stations class for d in pm: pr = [] for s in us_pm: v = d[s] * r #pr.append(get_aqi_class(crawling.AQIPM25(v))) pr.append(crawling.aqi_pm25_china_class(v)) pm_.append(pr) # extract class for all pixels # for d in pm: # pr = [] # for idx in d: # pr.append(get_aqi_class(crawling.AQIPM25(idx))) # pm_.append(pr) return pm_
# for k, x in enumerate(y): # if x !=0 : # data.append(x) # px.append(xi[k]) # py.append(yi[t]) # px = np.array(px) # py = np.array(py) # final_data = griddata((px, py), np.array(data), (X,Y), method="nearest") # # st = "" # # for x in final_data: # # tmp = ",".join([str(y) for y in x.tolist()]) # # st += tmp + "\n" # # save_file("data/shandong_ip.csv", st, False) # mean_data = np.mean(final_data, axis=1) # print("output", mean_data.shape) # st = array_to_str(mean_data) # save_file("data/shandong_2014_ip.csv", st, False) #interplate missing data in shandong # data = pd.read_csv("/media/data/datasets/shandong/aqi_shandong_ip_uniq.csv", names=["timestamp", "pm25"]) # data = data.query("timestamp >= '2013-12-31 23:00:00' & timestamp <= '2016-12-31 22:00:00'") # print(data) # df = data.interpolate(method="nearest", axis=0) # df.to_csv("data/aqi_shandong_ip.csv") cr = Crawling() a = [57.17,57.38,55.41,53.42,49.79,45.85,41.6,37.95,37.18,36.44,36.08,37.21,43.26,48.26,52.74,54.6,54.63,56.43,60.68,64.13,65.65,65.92,66.44,67.45,71.07,68.69,61.21,51.95,36.48,29.86,25.69,22.49,21.59,21.67,21.14,19.35,21.78,24.59,28.19,26.57,28.03,29.6,30.64,31.9,31.76,33.8,35.79,39.81,48.28,50.06,48.09,41.05,35.71,32.03,29.74,29.07,26.03,26,27.87,28.24,32.49,37.59,38.62,38.53,39.64,40.92,43.37,47.85,52.05,55.47,56.02,57.56,62.46,62.97,59.7,55.95,52.43,50.36,47.1,42.57,41.11,41.46,43.1,44.23,49.63,54.71,57.82,59.17,58.91,63.12,68.03,69.29,68.54,69.63,74.33,74.13,77.24,83.62,81.71,80.21,76.88,75.12,69.81,67.79,66.07,63.99,62.08,65.99,69.63,71.68,71.61,71.09,72.36,75.51,79.8,82.22,82.1,82.63,85.88,91.59,99.86,99.05,91.31,82.9,76.56,73.16,72.69,72.92,73.79,76.08,73.1,71.32,75.49,76.37,79.48,77.49,80.35,81.98,85.32,89.96,95.4,98.31,104.83,110.51,111.41,113.25,105.22,87.02,73.06,61.42,56.06,55.07,54.71,55.29,51.25,52.26,53.91,50.27,47.48,47.12,46.79,50.4,50.64,50.23,49.19,50.87,53.19,53.13,52.51,50.99,50.06,48.55,48.3,48.39,45.47,43.08,40.76,37.26,37.95,40.19,45.08,49.6,51.22,53.62,55.38,56.66,60.18,62.94,63.3,65.17,67.24,71.32,75.01,77.55,72.62,65.21,55.26,50.17,43.23,38.94,37.94,38.42,39.75,43.94,52.46,57.27,62.33,64.36,67.57,68.53,69.89,68.73,69.82,72.43,76.8,80.15,86.84,87.45,79.97,69.67,59.36,55.02,49.73,46.76,44.99,45.06,45.81,52.56,57.9,63.47,66.1,67.93,67.24,66.78,68.24,68.38,64.5,63.2,55.56,51.83,53.17,52.08,45.47,43.93,43.87,40.35,36.98,33.11,30.99,31.16,32.14,38.42,43.05,47.31,49.43,50.99,49.7,49.92,52.31,52.5,53.93,54.88,54.99,56.92,58.16,60.71,59.55,56.18,55.34,51.52,47.37,43.71,41.86,40.73,40.47,42.52,43.58,41.97,40.96,39.57,40.69,41.65,41.34,42.75,43.62,44.2,47.31,50.39,52.88,58.05,57.76,55.46,51.44,46.82,44.17,43.97,44.53,43.95,46.34,49.96,52.32,57.17,58.08,58.9,59.69,64.09,64.98,67.79,66.89,67.79,70.75,74.93,77.74,76.76,75.8,72.09,68.28,62.09,58.38,57.3,56.81,57.97,59.28,60.85,64.24,65.59,66.19,66.75,68.91,71.25,73.26,74.21,74.96,76.21,78.34,81.95,88,92.97,91.5,82.46,74.72,69.04,66.27,65.37,64.59,65.17,66.66,69.76,76.28,77.07,79.45,81.97,83.07,84.65,83.45,84.21,83.47,85.27,86.52,89.31,92.92,92.16,86.95,80.2,75.76,73.44,72.54,68.34,65.33,64.5,64.88,65.18,65.02,66.88,67.2,64.63,64.95,64.57,63.11,62.48,61.14,59.6,59.64,60.3,63.59,64.65,60.03,56.22,53.54,53.46,49.7,47.18,44.13,43.65,42.88,42.9,45.32,46.01,47,47.62,47.13,46.89,48.69,52.2,56.76,57.41,57.64,58.94,61.9,59.89,56.69,53.26,46.34,47.11,46.68,47.99,47.86,45.54,44.08,43.59,43.94,45.59,46.6,44.95,42.35,39.66,37.83,34.65,31.97,29.65,27.52,26.73,26.31,23.69,20.23,17.69,17.69,17.69,17.69,17.69,17.69,19.9,19.9,19.9,19.9,19.9,19.9,19.9,21.03,19.99,18.8,18.8,20.37,19.2,19.35,20.44,22.92,22.47,19.24,16.67,14.26,13.44,13.08,12.98,14.34,13.59,14.12,16.7,18.28,23.02,24.7,24.89,27.3,27.47,30.86,33.35,33.59,37.49,38.39,40.97,43.62,43.07,36.51,26.34,20.39,17.5,17.56,17.75,18.78,19.98,21.45,22.95,30.47,37.6,40.85,43.78,43.94,46.8,50.97,55.24,59.14,59.31,60.14,62.89,66.15,64.09,57.43,49.49,42.75,37.97,34.87,34.63,34.83,36.11,38.29,43.94,50.9,54.93,53.95,55.8,57.5,58.21,60.89,66.26,68.24,67.97,70.01,72.63,76.48,71.89,59.1,53.08,49.65,47.29,46.6,45.59,45.51,46.8,46.68,49.61,52.38,54.71,56.4,58.87,59.91,57.76,57.08,55.34,54.4,54.59,53.77,54.23,55.22,54.82,51.32,49.21,46.52,44.08,41.43,40.53,40.21,41.15,42.34,47.36,52.11,55.8,56.57,57.38,56.18,55.77,56.36,56.78,56.22,57.24,58.74,63.19,64.18,62.69,58.21,54.77,52.3,50.53,49.96,47.42,48.37,48.76,49.17,55.51,61.67,68.11,73.35,80.78,78.72,72.26,72.94,74.74,76.07,76.4,74.38,75.47,75.07,73.49,70.8,66.37,58.05,55.91,51.03,49.78,50.41,53.68,51.43,52.93,58.39,56.88,56.02,54.52,52.93,50.89,51.1,51.41,49.59,50.25,49.04,50.57,53.31,53.19,50.25,46.39,41.51,40.24,40.4,38.98,39.97,41.44,44.37,48.08,49.07,49.13,46.89,46.05,44.5,39.4,37.79,38.81,38.89,37.13,37.87,40.17,41.7,43.92,49.83,50.6,48.38,52.04,55.6,55.51,50.54,48.95,44.68,42.55,38.45,34.35,29.25,26.61,24.96,22.15,19.41,18.42,17.65,15.72,15.11,15.04,15.88,15.97,17.07,16.47,16.69,16.35,17.18,18.29,18.42,18.95,19.4,21.25,23.28,24.66,24.86,25.58,27.11,27.95,30.39,32.09,32.45,32.81,34.83,37.76,42.4,38.87,35.38,28.56,23.26,23.23,22.78,22.06,23.98,25.86,29.06,32.52,39.97,44.4,46,49.78,50.63,52.84,55.94,58.17,59.49,62.09,61.69,65.16] for x in a: print(cr.AQIPM25(x))
def evaluate_china(url, url2, stations, encoder_length=12, decoder_length=24, eval_stat=False): cr = Crawling() # missing = [136,156,824,1028,1053,1084,1085,1457,1460,1461,1462,1464,1465,1476,1477, \ # 1478,1479,1481,1482,1484,1485,1486,1487,1510,1841,3814,4246,4268,4301,4311, \ # 4313,4317,4329,4331,4333,4349,4360,4504,4524,4529,4532,4535,4580,4860,5270, \ # 5457,5489,5509,5962,6007,6011,6039,6054,6125,6172,6189,6192,6201,6230,6234, \ # 6246,6254,6255,6256,6257,6258,6295,6300,6319,6336,6356,6362,6371,6372,6394, \ # 6491,6492,6493,6494,6497,6517,6519,6523,6539,6559,6564,6568,6569,6570,6612, \ # 6613,6614,6615,6616,6617,6618,6619,6620,6621,6622,6623,6624,6625,6626,6627, \ # 6628,6629,6630,6631,6632,6633,6634,6635,6636,6637,6638,6687,6704,6754,6832, \ # 6849,6858,6873,6979,7255,8099,8100,8101,8225,8226,8227,8228,8229,8230,8231, \ # 8232,8233,8234,8235,8236,8237,8238,8239,8240,8241,8273,8274,8275,8276,8277, \ # 8278,8279,8280,8281,8282,8283,8284,8285,8286,8287,8288,8289,8290,8291,8344, \ # 8421,8422,8423,8424,8425,8426,8427,8428,8429,8430,8431,8432,8495,8496,8497, \ # 8498,8499,8500,8501,8502,8503,8504,8631,8759] data = utils.load_file(url) labels = utils.load_file(url2) if type(data) is list: data = np.asarray(data) if len(data.shape) == 4: lt = data.shape[0] * data.shape[1] data = np.reshape(data, (lt, data.shape[-2], data.shape[-1])) labels = np.reshape(labels, (lt, data.shape[-2], data.shape[-1])) else: lt = len(data) data = np.array(data) labels = np.array(labels) if not eval_stat: loss_mae = [0.0] * decoder_length else: loss_mae = np.array([[0.0] * decoder_length] * len(stations)) classes = [0.0] * decoder_length # loss_lt = 0 loss_total = [0.0] * decoder_length # ckt = 0 valid = [] for i, (d_, l_) in enumerate(zip(data, labels)): # st = 2 * i # ed = st + decoder_length # flag = False # for ck in range(st, ed): # if ck in missing: # flag = True # break # if flag: # continue d = d_[:decoder_length, :] lbt = l_[:decoder_length, :] for t_i, (t, l_t) in enumerate(zip(d, lbt)): pred_t = [] label_t = [] for r in stations: #pred_t.append(t[r]) #label_t.append(l_t[r]) # if not math.isnan(t[r]): pr = t[r] if pr < 0: pr = 0 print(pr) pred_t.append(cr.aqi_pm25_china(pr)) label_t.append(cr.aqi_pm25_china(l_t[r] * 300)) # label_t.append(l_t[r]) # print(t[r], l_t[r]) # if pred_t: if not eval_stat: mae = mean_absolute_error(pred_t, label_t) classes[t_i] += accuracy_score( [cr.aqi_pm25_china_class(x) for x in pred_t], [cr.aqi_pm25_china_class(x) for x in label_t]) if mae > 80: valid.append("%i,%i" % (i, t_i)) if mae < 80: loss_total[t_i] += 1 loss_mae[t_i] += mae else: mae = [abs(p_s - l_s) for p_s, l_s in zip(pred_t, label_t)] for m_i, m in enumerate(mae): loss_mae[m_i, t_i] += m # loss_lt += 1 # utils.update_progress((i + 1.0) / lt) va = "\n".join(valid) save_file("outline_china", va, False) loss_mae = [x / y if y > 0 else 0 for x, y in zip(loss_mae, loss_total)] for i, x in enumerate(loss_mae): # if not eval_stat: print(x, loss_total[i]) # else: # print([y/loss_lt for y in x]) # print("accumulated:", np.mean(loss_mae[1:6])) print(np.array(classes) / lt)
def evaluate_by_districts(url, url2, stride=2, encoder_length=72, decoder_length=24, forecast_factor=0, is_classify=False, confusion_title="", norm=True, is_grid=True, offset=48, agg=True): print(encoder_length, decoder_length, offset) if not utils.validate_path("district_idx.pkl"): districts = convert_coordinate_to_idx() else: districts = utils.load_file("district_idx.pkl") data = utils.load_file(url) print(np.shape(data)) if type(data) is list: data = np.asarray(data) if len(data.shape) == 4: lt = data.shape[0] * data.shape[1] # if not is_grid: # data = np.reshape(data, (lt, data.shape[-1])) # else: data = np.reshape(data, (lt, data.shape[-2], data.shape[-1])) else: lt = data.shape[0] data = np.reshape(data, (lt, data.shape[-2], data.shape[-1])) st_h = 0 if agg: days = int(math.ceil(data.shape[1] / 24.0)) if days > 2: st_h = (days - 1) * 24 labels = utils.load_file(url2) labels = np.asarray(labels) if not is_classify: loss_mae = [0.0] * data.shape[1] # loss_rmse = [0.0] * decoder_length elif not confusion_title: acc = 0. else: acc = None cr = Crawling() for i, d in enumerate(data): if not is_grid: d = d[st_h:decoder_length] else: d = d[st_h:decoder_length, :] lb_i = i * stride + encoder_length lbt = labels[(lb_i + offset):(lb_i + offset + decoder_length), :, forecast_factor] if not confusion_title: a = 0. else: a = None for t_i, (t, l_t) in enumerate(zip(d, lbt)): t_i += st_h if is_grid: pred_t = aggregate_predictions(districts, t) pred_t = np.array(pred_t) else: pred_t = t pred_t = pred_t.flatten() if not is_classify: if not forecast_factor: # mae, mse, _ = get_evaluation(pred_t, l_t) #mae = mean_absolute_error(pred_t * 300, l_t * 300) mae = mean_absolute_error( [cr.ConcPM25(x * 300) for x in pred_t], [cr.ConcPM25(x * 300) for x in l_t]) loss_mae[t_i] += mae # loss_rmse[t_i] += mse else: mae = mean_absolute_error( [cr.ConcPM10(x * 300) for x in pred_t], [cr.ConcPM10(x * 300) for x in l_t]) loss_mae[t_i] += mae elif not confusion_title: a += classify_data(pred_t, l_t, forecast_factor, tp="G") elif a is None: a = classify_data(pred_t, l_t, forecast_factor, True, tp="G") else: a += classify_data(pred_t, l_t, forecast_factor, True, tp="G") if is_classify: a = a / decoder_length if not confusion_title: acc += a elif acc is None: acc = a else: acc += a utils.update_progress((i + 1.0) / lt) if not is_classify: # print mae loss score # caculate loss for each timestep loss_mae = np.array(loss_mae) / lt # loss_rmse = [sqrt(x / lt) * 300 for x in loss_rmse] # calculate accumulated loss if agg: no_h = len(loss_mae) if no_h > 24: print("hourly errors", loss_mae[:24]) days = math.ceil(no_h * 1.0 / 24) for x in xrange(1, int(days)): ed = (x + 1) * 24 if ed > no_h: ed = no_h print("day %i" % (x + 1), np.mean(loss_mae[x * 24:ed])) else: print(loss_mae) else: print(loss_mae) #print_accumulate_error(loss_mae, loss_rmse, decoder_length, forecast_factor) elif not confusion_title: # print classification score acc = acc / lt * 100 print("accuracy %.4f" % acc) else: name = url.split("/")[-1] # print confusion matrix utils.save_file("results/confusion/confusion_%s" % name, acc) draw_confusion_matrix(acc, confusion_title, norm)