예제 #1
0
def print_accumulate_error(loss_mae,
                           loss_rmse,
                           decoder_length,
                           forecast_factor=0):
    cr = Crawling()
    for x in xrange(decoder_length):
        print("%ih" % (x + 1))
        if not forecast_factor:
            print("S MAE: %.6f %.6f" % (loss_mae[x], cr.ConcPM25(loss_mae[x])))
            print("S RMSE: %.6f %.6f" %
                  (loss_rmse[x], cr.ConcPM25(loss_rmse[x])))
        else:
            print("S PM10 MAE: %.6f %.6f" %
                  (loss_mae[x], cr.ConcPM10(loss_mae[x])))
            print("S PM10 RMSE: %.6f %.6f" %
                  (loss_rmse[x], cr.ConcPM10(loss_rmse[x])))
        if x > 0:
            loss_mae[x] += loss_mae[x - 1]
            t_mae = loss_mae[x] / (x + 1)
            loss_rmse[x] += loss_rmse[x - 1]
            t_rmse = loss_rmse[x] / (x + 1)
            if not forecast_factor:
                print("T MAE: %.6f %.6f" % (t_mae, cr.ConcPM25(t_mae)))
                print("T RMSE: %.6f %.6f" % (t_rmse, cr.ConcPM25(t_rmse)))
            else:
                print("T PM10 MAE: %.6f %.6f" % (t_mae, cr.ConcPM10(t_mae)))
                print("T PM10 RMSE: %.6f %.6f" % (t_rmse, cr.ConcPM10(t_rmse)))
예제 #2
0
def evaluate_single_pred(url, url2, decoder_length=8):
    cr = Crawling()
    data = utils.load_file(url)
    if type(data) is list:
        data = np.asarray(data)
    lt = data.shape[0] * data.shape[1]
    data = np.reshape(data, (lt, 25))
    dtl = len(data)
    labels = utils.load_file(url2)
    labels = np.asarray(labels)
    loss_mae = 0.0
    loss_rmse = 0.0
    r2_total = 0.0
    for i, d in enumerate(data):
        pred_t = np.asarray(d).flatten()
        lb_i = i * pr.strides + 24
        lbt = labels[lb_i:(lb_i + decoder_length), :, 0]
        lbg = lbt[decoder_length - 1, :].flatten()
        mae, mse, r2 = get_evaluation(pred_t, lbg)
        loss_mae += mae
        loss_rmse += mse
        r2_total += r2
        utils.update_progress((i + 1.0) / dtl)
    loss_mae = loss_mae / lt * 300
    loss_rmse = sqrt(loss_rmse / lt) * 300
    r2_total = r2_total / lt
    print("MAE: %.6f %.6f" % (loss_mae, cr.ConcPM25(loss_mae)))
    print("RMSE: %.6f %.6f" % (loss_rmse, cr.ConcPM25(loss_rmse)))
    print("R2 score: %.6f" % r2_total)
예제 #3
0
def evaluate_multi(url, url2, time_lags=24):
    cr = Crawling()
    preds = utils.load_file(url)
    preds = np.array(preds)
    lt = len(preds)
    labels = utils.load_file(url2)
    labels = np.array(labels)

    loss_mae0, loss_mae1 = 0.0, 0.0
    loss_rmse0, loss_rmse1 = 0.0, 0.0
    r2_0, r2_1 = 0.0, 0.0
    for i, d in enumerate(preds):
        lb_i = i * pr.strides + time_lags + 1
        mae0, mse0, r2 = get_evaluation(d[:time_lags, :],
                                        labels[lb_i:(lb_i + time_lags), :, 0])
        # mae1, mse1 = get_evaluation(d[:time_lags,:,1], labels[lb_i:(lb_i+time_lags),:,1])
        loss_rmse0 += mse0
        # loss_rmse1 += mse1
        loss_mae0 += mae0
        # loss_mae1 += mae1
        r2_0 += r2
    loss_mae0 = loss_mae0 / lt * 300
    loss_mae1 = loss_mae1 / lt * 300
    loss_rmse0 = sqrt(loss_rmse0 / lt) * 300
    loss_rmse1 = sqrt(loss_rmse1 / lt) * 300
    r2_0 = r2_0 / lt
    print("MAE: %.6f %.6f" % (loss_mae0, cr.ConcPM25(loss_mae0)))
    print("RMSE: %.6f %.6f" % (loss_rmse0, cr.ConcPM25(loss_rmse0)))
    print("R2 Score: %.6f" % r2_0)
예제 #4
0
def evaluate_sp(url, url2, decoder_length=24, is_grid=True, grid_eval=True):
    cr = Crawling()
    map_ = heatmap.build_map()
    data = utils.load_file(url)
    if type(data) is list:
        data = np.asarray(data)
    if len(data.shape) == 4:
        lt = data.shape[0] * data.shape[1]
    else:
        lt = data.shape[0]
    if is_grid:
        data = np.reshape(data, (lt, data.shape[-2], 25, 25))
    else:
        data = np.reshape(data, (lt, data.shape[-2], 25))
    labels = utils.load_file(url2)
    labels = np.asarray(labels)
    loss_mae = 0.0
    loss_rmse = 0.0
    r2_total = 0.0
    for i, d in enumerate(data):
        d = d[:decoder_length, :, :]
        pred_t = []
        if is_grid:
            for d_ in d:
                d_t = heatmap.clear_interpolate_bound(np.asarray(d_), map_)
                pred_t.append(d_t)
        else:
            if grid_eval:
                for d_ in d:
                    d_t = heatmap.fill_map(d_, map_)
                    pred_t.append(d_t)
            else:
                pred_t = d
        lb_i = i * pr.strides + 24
        lbt = labels[lb_i:(lb_i + decoder_length), :, 0]
        if grid_eval:
            lbg = []
            for x in lbt:
                x_l = heatmap.fill_map(x, map_)
                lbg.append(x_l)
            lbg = np.asarray(lbg)
            lbg = lbg.flatten()
        else:
            lbg = lbt.flatten()
        pred_t = np.asarray(pred_t)
        pred_t = pred_t.flatten()
        mae, mse, r2 = get_evaluation(pred_t, lbg)
        loss_mae += mae
        loss_rmse += mse
        r2_total += r2
        utils.update_progress((i + 1.0) / lt)
    loss_mae = loss_mae / lt * 300
    loss_rmse = sqrt(loss_rmse / lt) * 300
    r2_total = r2_total / lt
    print("MAE: %.6f %.6f" % (loss_mae, cr.ConcPM25(loss_mae)))
    print("RMSE: %.6f %.6f" % (loss_rmse, cr.ConcPM25(loss_rmse)))
    print("R2 Score: %.6f" % r2_total)
예제 #5
0
def evaluate_by_districts(url,
                          url2,
                          stride=2,
                          encoder_length=72,
                          decoder_length=24,
                          forecast_factor=0,
                          is_classify=False,
                          confusion_title="",
                          norm=True,
                          is_grid=True,
                          offset=48,
                          agg=True):
    print(encoder_length, decoder_length, offset)
    if not utils.validate_path("district_idx.pkl"):
        districts = convert_coordinate_to_idx()
    else:
        districts = utils.load_file("district_idx.pkl")
    data = utils.load_file(url)
    print(np.shape(data))
    if type(data) is list:
        data = np.asarray(data)
    if len(data.shape) == 4:
        lt = data.shape[0] * data.shape[1]
        # if not is_grid:
        # data = np.reshape(data, (lt, data.shape[-1]))
        # else:
        data = np.reshape(data, (lt, data.shape[-2], data.shape[-1]))
    else:
        lt = data.shape[0]
        data = np.reshape(data, (lt, data.shape[-2], data.shape[-1]))
    st_h = 0
    if agg:
        days = int(math.ceil(data.shape[1] / 24.0))
        if days > 2:
            st_h = (days - 1) * 24
    labels = utils.load_file(url2)
    labels = np.asarray(labels)
    if not is_classify:
        loss_mae = [0.0] * data.shape[1]
        # loss_rmse = [0.0] * decoder_length
    elif not confusion_title:
        acc = 0.
    else:
        acc = None
    cr = Crawling()
    for i, d in enumerate(data):
        if not is_grid:
            d = d[st_h:decoder_length]
        else:
            d = d[st_h:decoder_length, :]
        lb_i = i * stride + encoder_length
        lbt = labels[(lb_i + offset):(lb_i + offset + decoder_length), :,
                     forecast_factor]
        if not confusion_title:
            a = 0.
        else:
            a = None
        for t_i, (t, l_t) in enumerate(zip(d, lbt)):
            t_i += st_h
            if is_grid:
                pred_t = aggregate_predictions(districts, t)
                pred_t = np.array(pred_t)
            else:
                pred_t = t
            pred_t = pred_t.flatten()
            if not is_classify:
                if not forecast_factor:
                    # mae, mse, _ = get_evaluation(pred_t, l_t)
                    #mae = mean_absolute_error(pred_t * 300, l_t * 300)
                    mae = mean_absolute_error(
                        [cr.ConcPM25(x * 300) for x in pred_t],
                        [cr.ConcPM25(x * 300) for x in l_t])
                    loss_mae[t_i] += mae
                    # loss_rmse[t_i] += mse
                else:
                    mae = mean_absolute_error(
                        [cr.ConcPM10(x * 300) for x in pred_t],
                        [cr.ConcPM10(x * 300) for x in l_t])
                    loss_mae[t_i] += mae
            elif not confusion_title:
                a += classify_data(pred_t, l_t, forecast_factor, tp="G")
            elif a is None:
                a = classify_data(pred_t, l_t, forecast_factor, True, tp="G")
            else:
                a += classify_data(pred_t, l_t, forecast_factor, True, tp="G")
        if is_classify:
            a = a / decoder_length
            if not confusion_title:
                acc += a
            elif acc is None:
                acc = a
            else:
                acc += a
        utils.update_progress((i + 1.0) / lt)
    if not is_classify:
        # print mae loss score
        # caculate loss for each timestep
        loss_mae = np.array(loss_mae) / lt
        # loss_rmse = [sqrt(x / lt)  * 300 for x in loss_rmse]
        # calculate accumulated loss
        if agg:
            no_h = len(loss_mae)
            if no_h > 24:
                print("hourly errors", loss_mae[:24])
                days = math.ceil(no_h * 1.0 / 24)
                for x in xrange(1, int(days)):
                    ed = (x + 1) * 24
                    if ed > no_h:
                        ed = no_h
                    print("day %i" % (x + 1), np.mean(loss_mae[x * 24:ed]))
            else:
                print(loss_mae)
        else:
            print(loss_mae)
        #print_accumulate_error(loss_mae, loss_rmse, decoder_length, forecast_factor)
    elif not confusion_title:
        # print classification score
        acc = acc / lt * 100
        print("accuracy %.4f" % acc)
    else:
        name = url.split("/")[-1]
        # print confusion matrix
        utils.save_file("results/confusion/confusion_%s" % name, acc)
        draw_confusion_matrix(acc, confusion_title, norm)