def print_accumulate_error(loss_mae, loss_rmse, decoder_length, forecast_factor=0): cr = Crawling() for x in xrange(decoder_length): print("%ih" % (x + 1)) if not forecast_factor: print("S MAE: %.6f %.6f" % (loss_mae[x], cr.ConcPM25(loss_mae[x]))) print("S RMSE: %.6f %.6f" % (loss_rmse[x], cr.ConcPM25(loss_rmse[x]))) else: print("S PM10 MAE: %.6f %.6f" % (loss_mae[x], cr.ConcPM10(loss_mae[x]))) print("S PM10 RMSE: %.6f %.6f" % (loss_rmse[x], cr.ConcPM10(loss_rmse[x]))) if x > 0: loss_mae[x] += loss_mae[x - 1] t_mae = loss_mae[x] / (x + 1) loss_rmse[x] += loss_rmse[x - 1] t_rmse = loss_rmse[x] / (x + 1) if not forecast_factor: print("T MAE: %.6f %.6f" % (t_mae, cr.ConcPM25(t_mae))) print("T RMSE: %.6f %.6f" % (t_rmse, cr.ConcPM25(t_rmse))) else: print("T PM10 MAE: %.6f %.6f" % (t_mae, cr.ConcPM10(t_mae))) print("T PM10 RMSE: %.6f %.6f" % (t_rmse, cr.ConcPM10(t_rmse)))
def evaluate_single_pred(url, url2, decoder_length=8): cr = Crawling() data = utils.load_file(url) if type(data) is list: data = np.asarray(data) lt = data.shape[0] * data.shape[1] data = np.reshape(data, (lt, 25)) dtl = len(data) labels = utils.load_file(url2) labels = np.asarray(labels) loss_mae = 0.0 loss_rmse = 0.0 r2_total = 0.0 for i, d in enumerate(data): pred_t = np.asarray(d).flatten() lb_i = i * pr.strides + 24 lbt = labels[lb_i:(lb_i + decoder_length), :, 0] lbg = lbt[decoder_length - 1, :].flatten() mae, mse, r2 = get_evaluation(pred_t, lbg) loss_mae += mae loss_rmse += mse r2_total += r2 utils.update_progress((i + 1.0) / dtl) loss_mae = loss_mae / lt * 300 loss_rmse = sqrt(loss_rmse / lt) * 300 r2_total = r2_total / lt print("MAE: %.6f %.6f" % (loss_mae, cr.ConcPM25(loss_mae))) print("RMSE: %.6f %.6f" % (loss_rmse, cr.ConcPM25(loss_rmse))) print("R2 score: %.6f" % r2_total)
def evaluate_multi(url, url2, time_lags=24): cr = Crawling() preds = utils.load_file(url) preds = np.array(preds) lt = len(preds) labels = utils.load_file(url2) labels = np.array(labels) loss_mae0, loss_mae1 = 0.0, 0.0 loss_rmse0, loss_rmse1 = 0.0, 0.0 r2_0, r2_1 = 0.0, 0.0 for i, d in enumerate(preds): lb_i = i * pr.strides + time_lags + 1 mae0, mse0, r2 = get_evaluation(d[:time_lags, :], labels[lb_i:(lb_i + time_lags), :, 0]) # mae1, mse1 = get_evaluation(d[:time_lags,:,1], labels[lb_i:(lb_i+time_lags),:,1]) loss_rmse0 += mse0 # loss_rmse1 += mse1 loss_mae0 += mae0 # loss_mae1 += mae1 r2_0 += r2 loss_mae0 = loss_mae0 / lt * 300 loss_mae1 = loss_mae1 / lt * 300 loss_rmse0 = sqrt(loss_rmse0 / lt) * 300 loss_rmse1 = sqrt(loss_rmse1 / lt) * 300 r2_0 = r2_0 / lt print("MAE: %.6f %.6f" % (loss_mae0, cr.ConcPM25(loss_mae0))) print("RMSE: %.6f %.6f" % (loss_rmse0, cr.ConcPM25(loss_rmse0))) print("R2 Score: %.6f" % r2_0)
def evaluate_sp(url, url2, decoder_length=24, is_grid=True, grid_eval=True): cr = Crawling() map_ = heatmap.build_map() data = utils.load_file(url) if type(data) is list: data = np.asarray(data) if len(data.shape) == 4: lt = data.shape[0] * data.shape[1] else: lt = data.shape[0] if is_grid: data = np.reshape(data, (lt, data.shape[-2], 25, 25)) else: data = np.reshape(data, (lt, data.shape[-2], 25)) labels = utils.load_file(url2) labels = np.asarray(labels) loss_mae = 0.0 loss_rmse = 0.0 r2_total = 0.0 for i, d in enumerate(data): d = d[:decoder_length, :, :] pred_t = [] if is_grid: for d_ in d: d_t = heatmap.clear_interpolate_bound(np.asarray(d_), map_) pred_t.append(d_t) else: if grid_eval: for d_ in d: d_t = heatmap.fill_map(d_, map_) pred_t.append(d_t) else: pred_t = d lb_i = i * pr.strides + 24 lbt = labels[lb_i:(lb_i + decoder_length), :, 0] if grid_eval: lbg = [] for x in lbt: x_l = heatmap.fill_map(x, map_) lbg.append(x_l) lbg = np.asarray(lbg) lbg = lbg.flatten() else: lbg = lbt.flatten() pred_t = np.asarray(pred_t) pred_t = pred_t.flatten() mae, mse, r2 = get_evaluation(pred_t, lbg) loss_mae += mae loss_rmse += mse r2_total += r2 utils.update_progress((i + 1.0) / lt) loss_mae = loss_mae / lt * 300 loss_rmse = sqrt(loss_rmse / lt) * 300 r2_total = r2_total / lt print("MAE: %.6f %.6f" % (loss_mae, cr.ConcPM25(loss_mae))) print("RMSE: %.6f %.6f" % (loss_rmse, cr.ConcPM25(loss_rmse))) print("R2 Score: %.6f" % r2_total)
def evaluate_by_districts(url, url2, stride=2, encoder_length=72, decoder_length=24, forecast_factor=0, is_classify=False, confusion_title="", norm=True, is_grid=True, offset=48, agg=True): print(encoder_length, decoder_length, offset) if not utils.validate_path("district_idx.pkl"): districts = convert_coordinate_to_idx() else: districts = utils.load_file("district_idx.pkl") data = utils.load_file(url) print(np.shape(data)) if type(data) is list: data = np.asarray(data) if len(data.shape) == 4: lt = data.shape[0] * data.shape[1] # if not is_grid: # data = np.reshape(data, (lt, data.shape[-1])) # else: data = np.reshape(data, (lt, data.shape[-2], data.shape[-1])) else: lt = data.shape[0] data = np.reshape(data, (lt, data.shape[-2], data.shape[-1])) st_h = 0 if agg: days = int(math.ceil(data.shape[1] / 24.0)) if days > 2: st_h = (days - 1) * 24 labels = utils.load_file(url2) labels = np.asarray(labels) if not is_classify: loss_mae = [0.0] * data.shape[1] # loss_rmse = [0.0] * decoder_length elif not confusion_title: acc = 0. else: acc = None cr = Crawling() for i, d in enumerate(data): if not is_grid: d = d[st_h:decoder_length] else: d = d[st_h:decoder_length, :] lb_i = i * stride + encoder_length lbt = labels[(lb_i + offset):(lb_i + offset + decoder_length), :, forecast_factor] if not confusion_title: a = 0. else: a = None for t_i, (t, l_t) in enumerate(zip(d, lbt)): t_i += st_h if is_grid: pred_t = aggregate_predictions(districts, t) pred_t = np.array(pred_t) else: pred_t = t pred_t = pred_t.flatten() if not is_classify: if not forecast_factor: # mae, mse, _ = get_evaluation(pred_t, l_t) #mae = mean_absolute_error(pred_t * 300, l_t * 300) mae = mean_absolute_error( [cr.ConcPM25(x * 300) for x in pred_t], [cr.ConcPM25(x * 300) for x in l_t]) loss_mae[t_i] += mae # loss_rmse[t_i] += mse else: mae = mean_absolute_error( [cr.ConcPM10(x * 300) for x in pred_t], [cr.ConcPM10(x * 300) for x in l_t]) loss_mae[t_i] += mae elif not confusion_title: a += classify_data(pred_t, l_t, forecast_factor, tp="G") elif a is None: a = classify_data(pred_t, l_t, forecast_factor, True, tp="G") else: a += classify_data(pred_t, l_t, forecast_factor, True, tp="G") if is_classify: a = a / decoder_length if not confusion_title: acc += a elif acc is None: acc = a else: acc += a utils.update_progress((i + 1.0) / lt) if not is_classify: # print mae loss score # caculate loss for each timestep loss_mae = np.array(loss_mae) / lt # loss_rmse = [sqrt(x / lt) * 300 for x in loss_rmse] # calculate accumulated loss if agg: no_h = len(loss_mae) if no_h > 24: print("hourly errors", loss_mae[:24]) days = math.ceil(no_h * 1.0 / 24) for x in xrange(1, int(days)): ed = (x + 1) * 24 if ed > no_h: ed = no_h print("day %i" % (x + 1), np.mean(loss_mae[x * 24:ed])) else: print(loss_mae) else: print(loss_mae) #print_accumulate_error(loss_mae, loss_rmse, decoder_length, forecast_factor) elif not confusion_title: # print classification score acc = acc / lt * 100 print("accuracy %.4f" % acc) else: name = url.split("/")[-1] # print confusion matrix utils.save_file("results/confusion/confusion_%s" % name, acc) draw_confusion_matrix(acc, confusion_title, norm)