Example #1
0
def print_accumulate_error(loss_mae,
                           loss_rmse,
                           decoder_length,
                           forecast_factor=0):
    cr = Crawling()
    for x in xrange(decoder_length):
        print("%ih" % (x + 1))
        if not forecast_factor:
            print("S MAE: %.6f %.6f" % (loss_mae[x], cr.ConcPM25(loss_mae[x])))
            print("S RMSE: %.6f %.6f" %
                  (loss_rmse[x], cr.ConcPM25(loss_rmse[x])))
        else:
            print("S PM10 MAE: %.6f %.6f" %
                  (loss_mae[x], cr.ConcPM10(loss_mae[x])))
            print("S PM10 RMSE: %.6f %.6f" %
                  (loss_rmse[x], cr.ConcPM10(loss_rmse[x])))
        if x > 0:
            loss_mae[x] += loss_mae[x - 1]
            t_mae = loss_mae[x] / (x + 1)
            loss_rmse[x] += loss_rmse[x - 1]
            t_rmse = loss_rmse[x] / (x + 1)
            if not forecast_factor:
                print("T MAE: %.6f %.6f" % (t_mae, cr.ConcPM25(t_mae)))
                print("T RMSE: %.6f %.6f" % (t_rmse, cr.ConcPM25(t_rmse)))
            else:
                print("T PM10 MAE: %.6f %.6f" % (t_mae, cr.ConcPM10(t_mae)))
                print("T PM10 RMSE: %.6f %.6f" % (t_rmse, cr.ConcPM10(t_rmse)))
Example #2
0
def evaluate_lstm(url,
                  url2,
                  decoder_length=24,
                  forecast_factor=0,
                  is_classify=False):
    data = utils.load_file(url)
    if type(data) is list:
        data = np.asarray(data)
    lt = data.shape[0] * data.shape[1]
    data = np.reshape(data, (lt, data.shape[-1]))
    if decoder_length > data.shape[-1]:
        decoder_length = data.shape[-1]
    dtl = len(data)
    labels = utils.load_file(url2)
    labels = np.asarray(labels)
    if not is_classify:
        loss_mae = [0.0] * decoder_length
        loss_rmse = [0.0] * decoder_length
    else:
        acc = 0.
    #: r2_total = 0.0
    cr = Crawling()
    for i, d in enumerate(data):
        if decoder_length < data.shape[-1]:
            pred_t = d[:decoder_length]
        else:
            pred_t = d
        lb_i = i * pr.strides + 24
        lbt = np.mean(labels[lb_i:(lb_i + decoder_length), :, forecast_factor],
                      axis=1)
        a = 0.
        for t_i, (p, l) in enumerate(zip(pred_t, lbt)):
            if not is_classify:
                # mae, mse, _ = get_evaluation(p, l)
                mae = abs(cr.ConcPM10(p * 300) - cr.ConcPM10(l * 300))
                loss_mae[t_i] += mae
                # loss_rmse[t_i] += mse
            else:
                a += classify_data(pred_t, lbt, forecast_factor)
        if is_classify:
            a = a / decoder_length
            acc += a
        # r2_total += r2
        utils.update_progress((i + 1.0) / dtl)
    if not is_classify:
        loss_mae = np.array(loss_mae) / lt
        # loss_rmse = [sqrt(x / lt)  * 300 for x in loss_rmse]
        # print("R2 score: %.6f" % r2_total)
        print_accumulate_error(loss_mae,
                               loss_rmse,
                               decoder_length,
                               forecast_factor=forecast_factor)
    else:
        acc = acc / lt * 100
        print("accuracy %.4f" % acc)
Example #3
0
def evaluate_by_districts(url,
                          url2,
                          stride=2,
                          encoder_length=72,
                          decoder_length=24,
                          forecast_factor=0,
                          is_classify=False,
                          confusion_title="",
                          norm=True,
                          is_grid=True,
                          offset=48,
                          agg=True):
    print(encoder_length, decoder_length, offset)
    if not utils.validate_path("district_idx.pkl"):
        districts = convert_coordinate_to_idx()
    else:
        districts = utils.load_file("district_idx.pkl")
    data = utils.load_file(url)
    print(np.shape(data))
    if type(data) is list:
        data = np.asarray(data)
    if len(data.shape) == 4:
        lt = data.shape[0] * data.shape[1]
        # if not is_grid:
        # data = np.reshape(data, (lt, data.shape[-1]))
        # else:
        data = np.reshape(data, (lt, data.shape[-2], data.shape[-1]))
    else:
        lt = data.shape[0]
        data = np.reshape(data, (lt, data.shape[-2], data.shape[-1]))
    st_h = 0
    if agg:
        days = int(math.ceil(data.shape[1] / 24.0))
        if days > 2:
            st_h = (days - 1) * 24
    labels = utils.load_file(url2)
    labels = np.asarray(labels)
    if not is_classify:
        loss_mae = [0.0] * data.shape[1]
        # loss_rmse = [0.0] * decoder_length
    elif not confusion_title:
        acc = 0.
    else:
        acc = None
    cr = Crawling()
    for i, d in enumerate(data):
        if not is_grid:
            d = d[st_h:decoder_length]
        else:
            d = d[st_h:decoder_length, :]
        lb_i = i * stride + encoder_length
        lbt = labels[(lb_i + offset):(lb_i + offset + decoder_length), :,
                     forecast_factor]
        if not confusion_title:
            a = 0.
        else:
            a = None
        for t_i, (t, l_t) in enumerate(zip(d, lbt)):
            t_i += st_h
            if is_grid:
                pred_t = aggregate_predictions(districts, t)
                pred_t = np.array(pred_t)
            else:
                pred_t = t
            pred_t = pred_t.flatten()
            if not is_classify:
                if not forecast_factor:
                    # mae, mse, _ = get_evaluation(pred_t, l_t)
                    #mae = mean_absolute_error(pred_t * 300, l_t * 300)
                    mae = mean_absolute_error(
                        [cr.ConcPM25(x * 300) for x in pred_t],
                        [cr.ConcPM25(x * 300) for x in l_t])
                    loss_mae[t_i] += mae
                    # loss_rmse[t_i] += mse
                else:
                    mae = mean_absolute_error(
                        [cr.ConcPM10(x * 300) for x in pred_t],
                        [cr.ConcPM10(x * 300) for x in l_t])
                    loss_mae[t_i] += mae
            elif not confusion_title:
                a += classify_data(pred_t, l_t, forecast_factor, tp="G")
            elif a is None:
                a = classify_data(pred_t, l_t, forecast_factor, True, tp="G")
            else:
                a += classify_data(pred_t, l_t, forecast_factor, True, tp="G")
        if is_classify:
            a = a / decoder_length
            if not confusion_title:
                acc += a
            elif acc is None:
                acc = a
            else:
                acc += a
        utils.update_progress((i + 1.0) / lt)
    if not is_classify:
        # print mae loss score
        # caculate loss for each timestep
        loss_mae = np.array(loss_mae) / lt
        # loss_rmse = [sqrt(x / lt)  * 300 for x in loss_rmse]
        # calculate accumulated loss
        if agg:
            no_h = len(loss_mae)
            if no_h > 24:
                print("hourly errors", loss_mae[:24])
                days = math.ceil(no_h * 1.0 / 24)
                for x in xrange(1, int(days)):
                    ed = (x + 1) * 24
                    if ed > no_h:
                        ed = no_h
                    print("day %i" % (x + 1), np.mean(loss_mae[x * 24:ed]))
            else:
                print(loss_mae)
        else:
            print(loss_mae)
        #print_accumulate_error(loss_mae, loss_rmse, decoder_length, forecast_factor)
    elif not confusion_title:
        # print classification score
        acc = acc / lt * 100
        print("accuracy %.4f" % acc)
    else:
        name = url.split("/")[-1]
        # print confusion matrix
        utils.save_file("results/confusion/confusion_%s" % name, acc)
        draw_confusion_matrix(acc, confusion_title, norm)