def print_accumulate_error(loss_mae, loss_rmse, decoder_length, forecast_factor=0): cr = Crawling() for x in xrange(decoder_length): print("%ih" % (x + 1)) if not forecast_factor: print("S MAE: %.6f %.6f" % (loss_mae[x], cr.ConcPM25(loss_mae[x]))) print("S RMSE: %.6f %.6f" % (loss_rmse[x], cr.ConcPM25(loss_rmse[x]))) else: print("S PM10 MAE: %.6f %.6f" % (loss_mae[x], cr.ConcPM10(loss_mae[x]))) print("S PM10 RMSE: %.6f %.6f" % (loss_rmse[x], cr.ConcPM10(loss_rmse[x]))) if x > 0: loss_mae[x] += loss_mae[x - 1] t_mae = loss_mae[x] / (x + 1) loss_rmse[x] += loss_rmse[x - 1] t_rmse = loss_rmse[x] / (x + 1) if not forecast_factor: print("T MAE: %.6f %.6f" % (t_mae, cr.ConcPM25(t_mae))) print("T RMSE: %.6f %.6f" % (t_rmse, cr.ConcPM25(t_rmse))) else: print("T PM10 MAE: %.6f %.6f" % (t_mae, cr.ConcPM10(t_mae))) print("T PM10 RMSE: %.6f %.6f" % (t_rmse, cr.ConcPM10(t_rmse)))
def evaluate_lstm(url, url2, decoder_length=24, forecast_factor=0, is_classify=False): data = utils.load_file(url) if type(data) is list: data = np.asarray(data) lt = data.shape[0] * data.shape[1] data = np.reshape(data, (lt, data.shape[-1])) if decoder_length > data.shape[-1]: decoder_length = data.shape[-1] dtl = len(data) labels = utils.load_file(url2) labels = np.asarray(labels) if not is_classify: loss_mae = [0.0] * decoder_length loss_rmse = [0.0] * decoder_length else: acc = 0. #: r2_total = 0.0 cr = Crawling() for i, d in enumerate(data): if decoder_length < data.shape[-1]: pred_t = d[:decoder_length] else: pred_t = d lb_i = i * pr.strides + 24 lbt = np.mean(labels[lb_i:(lb_i + decoder_length), :, forecast_factor], axis=1) a = 0. for t_i, (p, l) in enumerate(zip(pred_t, lbt)): if not is_classify: # mae, mse, _ = get_evaluation(p, l) mae = abs(cr.ConcPM10(p * 300) - cr.ConcPM10(l * 300)) loss_mae[t_i] += mae # loss_rmse[t_i] += mse else: a += classify_data(pred_t, lbt, forecast_factor) if is_classify: a = a / decoder_length acc += a # r2_total += r2 utils.update_progress((i + 1.0) / dtl) if not is_classify: loss_mae = np.array(loss_mae) / lt # loss_rmse = [sqrt(x / lt) * 300 for x in loss_rmse] # print("R2 score: %.6f" % r2_total) print_accumulate_error(loss_mae, loss_rmse, decoder_length, forecast_factor=forecast_factor) else: acc = acc / lt * 100 print("accuracy %.4f" % acc)
def evaluate_by_districts(url, url2, stride=2, encoder_length=72, decoder_length=24, forecast_factor=0, is_classify=False, confusion_title="", norm=True, is_grid=True, offset=48, agg=True): print(encoder_length, decoder_length, offset) if not utils.validate_path("district_idx.pkl"): districts = convert_coordinate_to_idx() else: districts = utils.load_file("district_idx.pkl") data = utils.load_file(url) print(np.shape(data)) if type(data) is list: data = np.asarray(data) if len(data.shape) == 4: lt = data.shape[0] * data.shape[1] # if not is_grid: # data = np.reshape(data, (lt, data.shape[-1])) # else: data = np.reshape(data, (lt, data.shape[-2], data.shape[-1])) else: lt = data.shape[0] data = np.reshape(data, (lt, data.shape[-2], data.shape[-1])) st_h = 0 if agg: days = int(math.ceil(data.shape[1] / 24.0)) if days > 2: st_h = (days - 1) * 24 labels = utils.load_file(url2) labels = np.asarray(labels) if not is_classify: loss_mae = [0.0] * data.shape[1] # loss_rmse = [0.0] * decoder_length elif not confusion_title: acc = 0. else: acc = None cr = Crawling() for i, d in enumerate(data): if not is_grid: d = d[st_h:decoder_length] else: d = d[st_h:decoder_length, :] lb_i = i * stride + encoder_length lbt = labels[(lb_i + offset):(lb_i + offset + decoder_length), :, forecast_factor] if not confusion_title: a = 0. else: a = None for t_i, (t, l_t) in enumerate(zip(d, lbt)): t_i += st_h if is_grid: pred_t = aggregate_predictions(districts, t) pred_t = np.array(pred_t) else: pred_t = t pred_t = pred_t.flatten() if not is_classify: if not forecast_factor: # mae, mse, _ = get_evaluation(pred_t, l_t) #mae = mean_absolute_error(pred_t * 300, l_t * 300) mae = mean_absolute_error( [cr.ConcPM25(x * 300) for x in pred_t], [cr.ConcPM25(x * 300) for x in l_t]) loss_mae[t_i] += mae # loss_rmse[t_i] += mse else: mae = mean_absolute_error( [cr.ConcPM10(x * 300) for x in pred_t], [cr.ConcPM10(x * 300) for x in l_t]) loss_mae[t_i] += mae elif not confusion_title: a += classify_data(pred_t, l_t, forecast_factor, tp="G") elif a is None: a = classify_data(pred_t, l_t, forecast_factor, True, tp="G") else: a += classify_data(pred_t, l_t, forecast_factor, True, tp="G") if is_classify: a = a / decoder_length if not confusion_title: acc += a elif acc is None: acc = a else: acc += a utils.update_progress((i + 1.0) / lt) if not is_classify: # print mae loss score # caculate loss for each timestep loss_mae = np.array(loss_mae) / lt # loss_rmse = [sqrt(x / lt) * 300 for x in loss_rmse] # calculate accumulated loss if agg: no_h = len(loss_mae) if no_h > 24: print("hourly errors", loss_mae[:24]) days = math.ceil(no_h * 1.0 / 24) for x in xrange(1, int(days)): ed = (x + 1) * 24 if ed > no_h: ed = no_h print("day %i" % (x + 1), np.mean(loss_mae[x * 24:ed])) else: print(loss_mae) else: print(loss_mae) #print_accumulate_error(loss_mae, loss_rmse, decoder_length, forecast_factor) elif not confusion_title: # print classification score acc = acc / lt * 100 print("accuracy %.4f" % acc) else: name = url.split("/")[-1] # print confusion matrix utils.save_file("results/confusion/confusion_%s" % name, acc) draw_confusion_matrix(acc, confusion_title, norm)