Example #1
0
def plot_sets(data, sets, titles, tam=[12, 10], save=False, file=None):
    num = len(sets)
    #fig = plt.figure(figsize=tam)
    maxx = max(data)
    minx = min(data)
    #h = 1/num
    #print(h)
    fig, axes = plt.subplots(nrows=num, ncols=1, figsize=tam)
    for k in np.arange(0, num):
        ticks = []
        x = []
        ax = axes[k]
        ax.set_title(titles[k])
        ax.set_ylim([0, 1.1])
        for key in sets[k].keys():
            s = sets[k][key]
            if s.mf == Membership.trimf:
                ax.plot(s.parameters, [0, 1, 0])
            elif s.mf == Membership.gaussmf:
                tmpx = [kk for kk in np.arange(s.lower, s.upper)]
                tmpy = [s.membership(kk) for kk in np.arange(s.lower, s.upper)]
                ax.plot(tmpx, tmpy)
            elif s.mf == Membership.trapmf:
                ax.plot(s.parameters, [0, 1, 1, 0])
            ticks.append(str(round(s.centroid, 0)) + '\n' + s.name)
            x.append(s.centroid)
        ax.xaxis.set_ticklabels(ticks)
        ax.xaxis.set_ticks(x)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Example #2
0
def plot_residuals(targets, models, tam=[8, 8], save=False, file=None):
    fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam)

    for c, mfts in enumerate(models, start=0):
        if len(models) > 1:
            ax = axes[c]
        else:
            ax = axes
        forecasts = mfts.forecast(targets)
        res = residuals(targets, forecasts, mfts.order)
        mu = np.mean(res)
        sig = np.std(res)

        if c == 0: ax[0].set_title("Residuals", size='large')
        ax[0].set_ylabel(mfts.shortname, size='large')
        ax[0].set_xlabel(' ')
        ax[0].plot(res)

        if c == 0: ax[1].set_title("Residuals Autocorrelation", size='large')
        ax[1].set_ylabel('ACS')
        ax[1].set_xlabel('Lag')
        ax[1].acorr(res)

        if c == 0: ax[2].set_title("Residuals Histogram", size='large')
        ax[2].set_ylabel('Freq')
        ax[2].set_xlabel('Bins')
        ax[2].hist(res)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Example #3
0
def single_plot_residuals(targets,
                          forecasts,
                          order,
                          tam=[8, 8],
                          save=False,
                          file=None):
    fig, ax = plt.subplots(nrows=1, ncols=3, figsize=tam)

    res = residuals(targets, forecasts, order)

    ax[0].set_title("Residuals", size='large')
    ax[0].set_ylabel("Model", size='large')
    ax[0].set_xlabel(' ')
    ax[0].plot(res)

    ax[1].set_title("Residuals Autocorrelation", size='large')
    ax[1].set_ylabel('ACS')
    ax[1].set_xlabel('Lag')
    ax[1].acorr(res)

    ax[2].set_title("Residuals Histogram", size='large')
    ax[2].set_ylabel('Freq')
    ax[2].set_xlabel('Bins')
    ax[2].hist(res)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Example #4
0
def pftsExploreOrderAndPartitions(data, save=False, file=None):
    fig, axes = plt.subplots(nrows=4, ncols=1, figsize=[6, 8])
    data_fs1 = Grid.GridPartitioner(data=data, npart=10).sets
    mi = []
    ma = []

    axes[0].set_title('Point Forecasts by Order')
    axes[2].set_title('Interval Forecasts by Order')

    for order in np.arange(1, 6):
        fts = pwfts.ProbabilisticWeightedFTS("")
        fts.shortname = "n = " + str(order)
        fts.train(data, sets=data_fs1.sets, order=order)
        point_forecasts = fts.forecast(data)
        interval_forecasts = fts.forecast_interval(data)
        lower = [kk[0] for kk in interval_forecasts]
        upper = [kk[1] for kk in interval_forecasts]
        mi.append(min(lower) * 0.95)
        ma.append(max(upper) * 1.05)
        for k in np.arange(0, order):
            point_forecasts.insert(0, None)
            lower.insert(0, None)
            upper.insert(0, None)
        axes[0].plot(point_forecasts, label=fts.shortname)
        axes[2].plot(lower, label=fts.shortname)
        axes[2].plot(upper)

    axes[1].set_title('Point Forecasts by Number of Partitions')
    axes[3].set_title('Interval Forecasts by Number of Partitions')

    for partitions in np.arange(5, 11):
        data_fs = Grid.GridPartitioner(data=data, npart=partitions).sets
        fts = pwfts.ProbabilisticWeightedFTS("")
        fts.shortname = "q = " + str(partitions)
        fts.train(data, sets=data_fs.sets, order=1)
        point_forecasts = fts.forecast(data)
        interval_forecasts = fts.forecast_interval(data)
        lower = [kk[0] for kk in interval_forecasts]
        upper = [kk[1] for kk in interval_forecasts]
        mi.append(min(lower) * 0.95)
        ma.append(max(upper) * 1.05)
        point_forecasts.insert(0, None)
        lower.insert(0, None)
        upper.insert(0, None)
        axes[1].plot(point_forecasts, label=fts.shortname)
        axes[3].plot(lower, label=fts.shortname)
        axes[3].plot(upper)

    for ax in axes:
        ax.set_ylabel('F(T)')
        ax.set_xlabel('T')
        ax.plot(data, label="Original", color="black", linewidth=1.5)
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1))
        ax.set_ylim([min(mi), max(ma)])
        ax.set_xlim([0, len(data)])

    plt.tight_layout()

    cUtil.show_and_save_image(fig, file, save)
Example #5
0
def plot_sets(partitioner,
              start=0,
              end=10,
              step=1,
              tam=[5, 5],
              colors=None,
              save=False,
              file=None,
              axes=None,
              data=None,
              window_size=1,
              only_lines=False,
              legend=True):

    range = np.arange(start, end, step)
    ticks = []
    if axes is None:
        fig, axes = plt.subplots(nrows=1, ncols=1, figsize=tam)

    for ct, key in enumerate(partitioner.ordered_sets):
        fset = partitioner.sets[key]
        if not only_lines:
            for t in range:
                tdisp = t - (t % window_size)
                fset.membership(0, tdisp)
                param = fset.perturbated_parameters[str(tdisp)]

                if fset.mf == Membership.trimf:
                    if t == start:
                        line = axes.plot([t, t + 1, t], param, label=fset.name)
                        fset.metadata['color'] = line[0].get_color()
                    else:
                        axes.plot([t, t + 1, t],
                                  param,
                                  c=fset.metadata['color'])

                ticks.extend(["t+" + str(t), ""])
        else:
            tmp = []
            for t in range:
                tdisp = t - (t % window_size)
                fset.membership(0, tdisp)
                param = fset.perturbated_parameters[str(tdisp)]
                tmp.append(np.polyval(param, tdisp))
            axes.plot(range, tmp, ls="--", c="blue")

    axes.set_ylabel("Universe of Discourse")
    axes.set_xlabel("Time")
    plt.xticks([k for k in range], ticks, rotation='vertical')

    if legend:
        handles0, labels0 = axes.get_legend_handles_labels()
        lgd = axes.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))

    if data is not None:
        axes.plot(np.arange(start, start + len(data), 1), data, c="black")

    if file is not None:
        plt.tight_layout()
        Util.show_and_save_image(fig, file, save)
Example #6
0
def plot_sets_conditional(model,
                          data,
                          start=0,
                          end=10,
                          step=1,
                          tam=[5, 5],
                          colors=None,
                          save=False,
                          file=None,
                          axes=None):

    range = np.arange(start, end, step)
    ticks = []
    if axes is None:
        fig, axes = plt.subplots(nrows=1, ncols=1, figsize=tam)

    for ct, key in enumerate(model.partitioner.ordered_sets):
        set = model.partitioner.sets[key]
        for t in range:
            tdisp = model.perturbation_factors(data[t])
            set.perturbate_parameters(tdisp[ct])
            param = set.perturbated_parameters[str(tdisp[ct])]

            if set.mf == Membership.trimf:
                if t == start:
                    line = axes.plot([t, t + 1, t], param, label=set.name)
                    set.metadata['color'] = line[0].get_color()
                else:
                    axes.plot([t, t + 1, t], param, c=set.metadata['color'])

            ticks.extend(["t+" + str(t), ""])

    axes.set_ylabel("Universe of Discourse")
    axes.set_xlabel("Time")
    plt.xticks([k for k in range], ticks, rotation='vertical')

    handles0, labels0 = axes.get_legend_handles_labels()
    lgd = axes.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))

    if data is not None:
        axes.plot(np.arange(start, start + len(data), 1), data, c="black")

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Example #7
0
def plotResiduals(targets, models, tam=[8, 8], save=False, file=None):
    """
    Plot residuals and statistics

    :param targets: 
    :param models: 
    :param tam: 
    :param save: 
    :param file: 
    :return: 
    """
    fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam)
    for c, mfts in enumerate(models):
        if len(models) > 1:
            ax = axes[c]
        else:
            ax = axes
        forecasts = mfts.forecast(targets)
        res = residuals(targets,forecasts,mfts.order)
        mu = np.mean(res)
        sig = np.std(res)

        ax[0].set_title("Residuals Mean=" + str(mu) + " STD = " + str(sig))
        ax[0].set_ylabel('E')
        ax[0].set_xlabel('T')
        ax[0].plot(res)

        ax[1].set_title("Residuals Autocorrelation")
        ax[1].set_ylabel('ACS')
        ax[1].set_xlabel('Lag')
        ax[1].acorr(res)

        ax[2].set_title("Residuals Histogram")
        ax[2].set_ylabel('Freq')
        ax[2].set_xlabel('Bins')
        ax[2].hist(res)

        c += 1

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Example #8
0
def plot_residuals_by_model(targets,
                            models,
                            tam=[8, 8],
                            save=False,
                            file=None):
    import scipy as sp

    fig, axes = plt.subplots(nrows=len(models), ncols=4, figsize=tam)

    for c, mfts in enumerate(models, start=0):
        if len(models) > 1:
            ax = axes[c]
        else:
            ax = axes
        forecasts = mfts.predict(targets)
        res = residuals(targets, forecasts, mfts.order + 1)
        mu = np.mean(res)
        sig = np.std(res)

        if c == 0: ax[0].set_title("Residuals", size='large')
        ax[0].set_ylabel(mfts.shortname, size='large')
        ax[0].set_xlabel(' ')
        ax[0].plot(res)

        if c == 0: ax[1].set_title("Autocorrelation", size='large')
        ax[1].set_ylabel('ACS')
        ax[1].set_xlabel('Lag')
        ax[1].acorr(res)

        if c == 0: ax[2].set_title("Histogram", size='large')
        ax[2].set_ylabel('Freq')
        ax[2].set_xlabel('Bins')
        ax[2].hist(res)

        if c == 0: ax[3].set_title("QQ Plot", size='large')

        _, (__, ___, r) = sp.stats.probplot(res, plot=ax[3], fit=True)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Example #9
0
def single_plot_residuals(res, order, tam=[10, 7], save=False, file=None):
    import scipy as sp

    fig, ax = plt.subplots(nrows=2, ncols=2, figsize=tam)

    ax[0][0].set_title("Residuals", size='large')
    ax[0][0].plot(res)

    ax[0][1].set_title("Autocorrelation", size='large')
    ax[0][1].set_ylabel('ACF')
    ax[0][1].set_xlabel('Lag')
    ax[0][1].acorr(res)

    ax[1][0].set_title("Histogram", size='large')
    ax[1][0].set_ylabel('Freq')
    ax[1][0].set_xlabel('Bins')
    ax[1][0].hist(res)

    _, (__, ___, r) = sp.stats.probplot(res, plot=ax[1][1], fit=True)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Example #10
0
def simpleSearch_RMSE(train,
                      test,
                      model,
                      partitions,
                      orders,
                      save=False,
                      file=None,
                      tam=[10, 15],
                      plotforecasts=False,
                      elev=30,
                      azim=144,
                      intervals=False,
                      parameters=None,
                      partitioner=Grid.GridPartitioner,
                      transformation=None,
                      indexer=None):
    _3d = len(orders) > 1
    ret = []
    if _3d:
        errors = np.array([[0 for k in range(len(partitions))]
                           for kk in range(len(orders))])
    else:
        errors = []
    forecasted_best = []
    fig = plt.figure(figsize=tam)
    # fig.suptitle("Comparação de modelos ")
    if plotforecasts:
        ax0 = fig.add_axes([0, 0.4, 0.9, 0.5])  # left, bottom, width, height
        ax0.set_xlim([0, len(train)])
        ax0.set_ylim([min(train) * 0.9, max(train) * 1.1])
        ax0.set_title('Forecasts')
        ax0.set_ylabel('F(T)')
        ax0.set_xlabel('T')
    min_rmse = 1000000.0
    best = None

    for pc, p in enumerate(partitions, start=0):

        sets = partitioner(data=train, npart=p,
                           transformation=transformation).sets
        for oc, o in enumerate(orders, start=0):
            fts = model("q = " + str(p) + " n = " + str(o))
            fts.append_transformation(transformation)
            fts.train(train, sets=sets, order=o, parameters=parameters)
            if not intervals:
                forecasted = fts.forecast(test)
                if not fts.has_seasonality:
                    error = Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted[:-1]))
                else:
                    error = Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted))
                for kk in range(o):
                    forecasted.insert(0, None)
                if plotforecasts: ax0.plot(forecasted, label=fts.name)
            else:
                forecasted = fts.forecast_interval(test)
                error = 1.0 - Measures.rmse_interval(np.array(test[o:]),
                                                     np.array(forecasted[:-1]))
            if _3d:
                errors[oc, pc] = error
            else:
                errors.append(error)
            if error < min_rmse:
                min_rmse = error
                best = fts
                forecasted_best = forecasted

    # print(min_rmse)
    if plotforecasts:
        # handles0, labels0 = ax0.get_legend_handles_labels()
        # ax0.legend(handles0, labels0)
        ax0.plot(test, label="Original", linewidth=3.0, color="black")
        if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    if _3d and not plotforecasts:
        ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
        ax1.set_title('Error Surface')
        ax1.set_ylabel('Model order')
        ax1.set_xlabel('Number of partitions')
        ax1.set_zlabel('RMSE')
        X, Y = np.meshgrid(partitions, orders)
        surf = ax1.plot_surface(X,
                                Y,
                                errors,
                                rstride=1,
                                cstride=1,
                                antialiased=True)
    else:
        ax1 = fig.add_axes([0, 1, 0.9, 0.9])
        ax1.set_title('Error Curve')
        ax1.set_xlabel('Number of partitions')
        ax1.set_ylabel('RMSE')
        ax1.plot(partitions, errors)
    ret.append(best)
    ret.append(forecasted_best)
    ret.append(min_rmse)

    # plt.tight_layout()

    cUtil.show_and_save_image(fig, file, save)

    return ret
Example #11
0
def plot_dataframe_interval(
        file_synthetic,
        file_analytic,
        experiments,
        tam,
        save=False,
        file=None,
        sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'],
        sort_ascend=[True, False, True, True],
        save_best=False,
        ignore=None,
        replace=None):

    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam)

    axes[0].set_title('Sharpness')
    axes[1].set_title('Resolution')
    axes[2].set_title('Coverage')

    dat_syn = pd.read_csv(file_synthetic,
                          sep=";",
                          usecols=interval_dataframe_synthetic_columns())

    bests = find_best(dat_syn, sort_columns, sort_ascend)

    dat_ana = pd.read_csv(
        file_analytic,
        sep=";",
        usecols=interval_dataframe_analytic_columns(experiments))

    data_columns = analytical_data_columns(experiments)

    if save_best:
        dat = pd.DataFrame.from_dict(bests, orient='index')
        dat.to_csv(Util.uniquefilename(
            file_synthetic.replace("synthetic", "best")),
                   sep=";",
                   index=False)

    sharpness = []
    resolution = []
    coverage = []
    times = []
    labels = []
    bounds_shp = []

    for b in sorted(bests.keys()):
        if check_ignore_list(b, ignore):
            continue
        best = bests[b]
        df = dat_ana[(dat_ana.Model == best["Model"])
                     & (dat_ana.Order == best["Order"])
                     & (dat_ana.Scheme == best["Scheme"]) &
                     (dat_ana.Partitions == best["Partitions"])]
        sharpness.append(extract_measure(df, 'Sharpness', data_columns))
        resolution.append(extract_measure(df, 'Resolution', data_columns))
        coverage.append(extract_measure(df, 'Coverage', data_columns))
        times.append(extract_measure(df, 'TIME', data_columns))
        labels.append(
            check_replace_list(best["Model"] + " " + str(best["Order"]),
                               replace))

    axes[0].boxplot(sharpness, labels=labels, autorange=True, showmeans=True)
    axes[0].set_title("Sharpness")
    axes[1].boxplot(resolution, labels=labels, autorange=True, showmeans=True)
    axes[1].set_title("Resolution")
    axes[2].boxplot(coverage, labels=labels, autorange=True, showmeans=True)
    axes[2].set_title("Coverage")
    axes[2].set_ylim([0, 1.1])

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Example #12
0
def unified_scaled_point(experiments,
                         tam,
                         save=False,
                         file=None,
                         sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
                         sort_ascend=[1, 1, 1, 1],
                         save_best=False,
                         ignore=None,
                         replace=None):

    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam)

    axes[0].set_title('RMSE')
    axes[1].set_title('SMAPE')
    axes[2].set_title('U Statistic')

    models = {}

    for experiment in experiments:

        mdl = {}

        dat_syn = pd.read_csv(experiment[0],
                              sep=";",
                              usecols=point_dataframe_synthetic_columns())

        bests = find_best(dat_syn, sort_columns, sort_ascend)

        dat_ana = pd.read_csv(experiment[1],
                              sep=";",
                              usecols=point_dataframe_analytic_columns(
                                  experiment[2]))

        rmse = []
        smape = []
        u = []
        times = []

        data_columns = analytical_data_columns(experiment[2])

        for b in sorted(bests.keys()):
            if check_ignore_list(b, ignore):
                continue

            if b not in models:
                models[b] = {}
                models[b]['rmse'] = []
                models[b]['smape'] = []
                models[b]['u'] = []
                models[b]['times'] = []

            if b not in mdl:
                mdl[b] = {}
                mdl[b]['rmse'] = []
                mdl[b]['smape'] = []
                mdl[b]['u'] = []
                mdl[b]['times'] = []

            best = bests[b]
            tmp = dat_ana[(dat_ana.Model == best["Model"])
                          & (dat_ana.Order == best["Order"])
                          & (dat_ana.Scheme == best["Scheme"]) &
                          (dat_ana.Partitions == best["Partitions"])]
            tmpl = extract_measure(tmp, 'RMSE', data_columns)
            mdl[b]['rmse'].extend(tmpl)
            rmse.extend(tmpl)
            tmpl = extract_measure(tmp, 'SMAPE', data_columns)
            mdl[b]['smape'].extend(tmpl)
            smape.extend(tmpl)
            tmpl = extract_measure(tmp, 'U', data_columns)
            mdl[b]['u'].extend(tmpl)
            u.extend(tmpl)
            tmpl = extract_measure(tmp, 'TIME', data_columns)
            mdl[b]['times'].extend(tmpl)
            times.extend(tmpl)

            models[b]['label'] = check_replace_list(
                best["Model"] + " " + str(best["Order"]), replace)

        print("GLOBAL")
        rmse_param = scale_params(rmse)
        stats("rmse", rmse)
        smape_param = scale_params(smape)
        stats("smape", smape)
        u_param = scale_params(u)
        stats("u", u)
        times_param = scale_params(times)

        for key in sorted(models.keys()):
            models[key]['rmse'].extend(scale(mdl[key]['rmse'], rmse_param))
            models[key]['smape'].extend(scale(mdl[key]['smape'], smape_param))
            models[key]['u'].extend(scale(mdl[key]['u'], u_param))
            models[key]['times'].extend(scale(mdl[key]['times'], times_param))

    rmse = []
    smape = []
    u = []
    times = []
    labels = []
    for key in sorted(models.keys()):
        print(key)
        rmse.append(models[key]['rmse'])
        stats("rmse", models[key]['rmse'])
        smape.append(models[key]['smape'])
        stats("smape", models[key]['smape'])
        u.append(models[key]['u'])
        stats("u", models[key]['u'])
        times.append(models[key]['times'])
        labels.append(models[key]['label'])

    axes[0].boxplot(rmse, labels=labels, autorange=True, showmeans=True)
    axes[0].set_title("RMSE")
    axes[1].boxplot(smape, labels=labels, autorange=True, showmeans=True)
    axes[1].set_title("SMAPE")
    axes[2].boxplot(u, labels=labels, autorange=True, showmeans=True)
    axes[2].set_title("U Statistic")

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Example #13
0
def plot_dataframe_probabilistic(
        file_synthetic,
        file_analytic,
        experiments,
        tam,
        save=False,
        file=None,
        sort_columns=['CRPS1AVG', 'CRPS2AVG', 'CRPS1STD', 'CRPS2STD'],
        sort_ascend=[True, True, True, True],
        save_best=False,
        ignore=None,
        replace=None):

    fig, axes = plt.subplots(nrows=2, ncols=1, figsize=tam)

    axes[0].set_title('CRPS')
    axes[1].set_title('CRPS')

    dat_syn = pd.read_csv(file_synthetic,
                          sep=";",
                          usecols=probabilistic_dataframe_synthetic_columns())

    bests = find_best(dat_syn, sort_columns, sort_ascend)

    dat_ana = pd.read_csv(
        file_analytic,
        sep=";",
        usecols=probabilistic_dataframe_analytic_columns(experiments))

    data_columns = analytical_data_columns(experiments)

    if save_best:
        dat = pd.DataFrame.from_dict(bests, orient='index')
        dat.to_csv(Util.uniquefilename(
            file_synthetic.replace("synthetic", "best")),
                   sep=";",
                   index=False)

    crps1 = []
    crps2 = []
    labels = []

    for b in sorted(bests.keys()):
        if check_ignore_list(b, ignore):
            continue
        best = bests[b]
        df = dat_ana[(dat_ana.Model == best["Model"])
                     & (dat_ana.Order == best["Order"])
                     & (dat_ana.Scheme == best["Scheme"]) &
                     (dat_ana.Partitions == best["Partitions"])]
        crps1.append(extract_measure(df, 'CRPS_Interval', data_columns))
        crps2.append(extract_measure(df, 'CRPS_Distribution', data_columns))
        labels.append(
            check_replace_list(best["Model"] + " " + str(best["Order"]),
                               replace))

    axes[0].boxplot(crps1, labels=labels, autorange=True, showmeans=True)
    axes[1].boxplot(crps2, labels=labels, autorange=True, showmeans=True)

    plt.tight_layout()
    Util.show_and_save_image(fig, file, save)
Example #14
0
def unified_scaled_probabilistic(experiments,
                                 tam,
                                 save=False,
                                 file=None,
                                 sort_columns=['CRPSAVG', 'CRPSSTD'],
                                 sort_ascend=[True, True],
                                 save_best=False,
                                 ignore=None,
                                 replace=None):
    fig, axes = plt.subplots(nrows=1, ncols=1, figsize=tam)

    axes.set_title('CRPS')
    #axes[1].set_title('CRPS Distribution Ahead')

    models = {}

    for experiment in experiments:

        print(experiment)

        mdl = {}

        dat_syn = pd.read_csv(
            experiment[0],
            sep=";",
            usecols=probabilistic_dataframe_synthetic_columns())

        bests = find_best(dat_syn, sort_columns, sort_ascend)

        dat_ana = pd.read_csv(experiment[1],
                              sep=";",
                              usecols=probabilistic_dataframe_analytic_columns(
                                  experiment[2]))

        crps1 = []
        crps2 = []

        data_columns = analytical_data_columns(experiment[2])

        for b in sorted(bests.keys()):
            if check_ignore_list(b, ignore):
                continue

            if b not in models:
                models[b] = {}
                models[b]['crps1'] = []
                models[b]['crps2'] = []

            if b not in mdl:
                mdl[b] = {}
                mdl[b]['crps1'] = []
                mdl[b]['crps2'] = []

            best = bests[b]

            print(best)

            tmp = dat_ana[(dat_ana.Model == best["Model"])
                          & (dat_ana.Order == best["Order"])
                          & (dat_ana.Scheme == best["Scheme"]) &
                          (dat_ana.Partitions == best["Partitions"])]
            tmpl = extract_measure(tmp, 'CRPS_Interval', data_columns)
            mdl[b]['crps1'].extend(tmpl)
            crps1.extend(tmpl)
            tmpl = extract_measure(tmp, 'CRPS_Distribution', data_columns)
            mdl[b]['crps2'].extend(tmpl)
            crps2.extend(tmpl)

            models[b]['label'] = check_replace_list(
                best["Model"] + " " + str(best["Order"]), replace)

        crps1_param = scale_params(crps1)
        crps2_param = scale_params(crps2)

        for key in sorted(mdl.keys()):
            print(key)
            models[key]['crps1'].extend(scale(mdl[key]['crps1'], crps1_param))
            models[key]['crps2'].extend(scale(mdl[key]['crps2'], crps2_param))

    crps1 = []
    crps2 = []
    labels = []
    for key in sorted(models.keys()):
        crps1.append(models[key]['crps1'])
        crps2.append(models[key]['crps2'])
        labels.append(models[key]['label'])

    axes[0].boxplot(crps1, labels=labels, autorange=True, showmeans=True)
    axes[1].boxplot(crps2, labels=labels, autorange=True, showmeans=True)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Example #15
0
def plot_dataframe_interval_pinball(
        file_synthetic,
        file_analytic,
        experiments,
        tam,
        save=False,
        file=None,
        sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'],
        sort_ascend=[True, False, True, True],
        save_best=False,
        ignore=None,
        replace=None):

    fig, axes = plt.subplots(nrows=1, ncols=4, figsize=tam)
    axes[0].set_title(r'$\tau=0.05$')
    axes[1].set_title(r'$\tau=0.25$')
    axes[2].set_title(r'$\tau=0.75$')
    axes[3].set_title(r'$\tau=0.95$')

    dat_syn = pd.read_csv(file_synthetic,
                          sep=";",
                          usecols=interval_dataframe_synthetic_columns())

    bests = find_best(dat_syn, sort_columns, sort_ascend)

    dat_ana = pd.read_csv(
        file_analytic,
        sep=";",
        usecols=interval_dataframe_analytic_columns(experiments))

    data_columns = analytical_data_columns(experiments)

    if save_best:
        dat = pd.DataFrame.from_dict(bests, orient='index')
        dat.to_csv(Util.uniquefilename(
            file_synthetic.replace("synthetic", "best")),
                   sep=";",
                   index=False)

    q05 = []
    q25 = []
    q75 = []
    q95 = []
    labels = []

    for b in sorted(bests.keys()):
        if check_ignore_list(b, ignore):
            continue
        best = bests[b]
        df = dat_ana[(dat_ana.Model == best["Model"])
                     & (dat_ana.Order == best["Order"])
                     & (dat_ana.Scheme == best["Scheme"]) &
                     (dat_ana.Partitions == best["Partitions"])]
        q05.append(extract_measure(df, 'Q05', data_columns))
        q25.append(extract_measure(df, 'Q25', data_columns))
        q75.append(extract_measure(df, 'Q75', data_columns))
        q95.append(extract_measure(df, 'Q95', data_columns))
        labels.append(
            check_replace_list(best["Model"] + " " + str(best["Order"]),
                               replace))

    axes[0].boxplot(q05,
                    labels=labels,
                    vert=False,
                    autorange=True,
                    showmeans=True)
    axes[1].boxplot(q25,
                    labels=labels,
                    vert=False,
                    autorange=True,
                    showmeans=True)
    axes[2].boxplot(q75,
                    labels=labels,
                    vert=False,
                    autorange=True,
                    showmeans=True)
    axes[3].boxplot(q95,
                    labels=labels,
                    vert=False,
                    autorange=True,
                    showmeans=True)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Example #16
0
def unified_scaled_interval_pinball(
        experiments,
        tam,
        save=False,
        file=None,
        sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'],
        sort_ascend=[True, False, True, True],
        save_best=False,
        ignore=None,
        replace=None):
    fig, axes = plt.subplots(nrows=1, ncols=4, figsize=tam)
    axes[0].set_title(r'$\tau=0.05$')
    axes[1].set_title(r'$\tau=0.25$')
    axes[2].set_title(r'$\tau=0.75$')
    axes[3].set_title(r'$\tau=0.95$')
    models = {}

    for experiment in experiments:

        mdl = {}

        dat_syn = pd.read_csv(experiment[0],
                              sep=";",
                              usecols=interval_dataframe_synthetic_columns())

        bests = find_best(dat_syn, sort_columns, sort_ascend)

        dat_ana = pd.read_csv(experiment[1],
                              sep=";",
                              usecols=interval_dataframe_analytic_columns(
                                  experiment[2]))

        q05 = []
        q25 = []
        q75 = []
        q95 = []

        data_columns = analytical_data_columns(experiment[2])

        for b in sorted(bests.keys()):
            if check_ignore_list(b, ignore):
                continue

            if b not in models:
                models[b] = {}
                models[b]['q05'] = []
                models[b]['q25'] = []
                models[b]['q75'] = []
                models[b]['q95'] = []

            if b not in mdl:
                mdl[b] = {}
                mdl[b]['q05'] = []
                mdl[b]['q25'] = []
                mdl[b]['q75'] = []
                mdl[b]['q95'] = []

            best = bests[b]
            print(best)
            tmp = dat_ana[(dat_ana.Model == best["Model"])
                          & (dat_ana.Order == best["Order"])
                          & (dat_ana.Scheme == best["Scheme"]) &
                          (dat_ana.Partitions == best["Partitions"])]
            tmpl = extract_measure(tmp, 'Q05', data_columns)
            mdl[b]['q05'].extend(tmpl)
            q05.extend(tmpl)
            tmpl = extract_measure(tmp, 'Q25', data_columns)
            mdl[b]['q25'].extend(tmpl)
            q25.extend(tmpl)
            tmpl = extract_measure(tmp, 'Q75', data_columns)
            mdl[b]['q75'].extend(tmpl)
            q75.extend(tmpl)
            tmpl = extract_measure(tmp, 'Q95', data_columns)
            mdl[b]['q95'].extend(tmpl)
            q95.extend(tmpl)

            models[b]['label'] = check_replace_list(
                best["Model"] + " " + str(best["Order"]), replace)

        q05_param = scale_params(q05)
        q25_param = scale_params(q25)
        q75_param = scale_params(q75)
        q95_param = scale_params(q95)

        for key in sorted(models.keys()):
            models[key]['q05'].extend(scale(mdl[key]['q05'], q05_param))
            models[key]['q25'].extend(scale(mdl[key]['q25'], q25_param))
            models[key]['q75'].extend(scale(mdl[key]['q75'], q75_param))
            models[key]['q95'].extend(scale(mdl[key]['q95'], q95_param))

    q05 = []
    q25 = []
    q75 = []
    q95 = []
    labels = []
    for key in sorted(models.keys()):
        q05.append(models[key]['q05'])
        q25.append(models[key]['q25'])
        q75.append(models[key]['q75'])
        q95.append(models[key]['q95'])
        labels.append(models[key]['label'])

    axes[0].boxplot(q05,
                    labels=labels,
                    vert=False,
                    autorange=True,
                    showmeans=True)
    axes[1].boxplot(q25,
                    labels=labels,
                    vert=False,
                    autorange=True,
                    showmeans=True)
    axes[2].boxplot(q75,
                    labels=labels,
                    vert=False,
                    autorange=True,
                    showmeans=True)
    axes[3].boxplot(q95,
                    labels=labels,
                    vert=False,
                    autorange=True,
                    showmeans=True)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Example #17
0
def plot_compared_intervals_ahead(original,
                                  models,
                                  colors,
                                  distributions,
                                  time_from,
                                  time_to,
                                  intervals=True,
                                  save=False,
                                  file=None,
                                  tam=[20, 5],
                                  resolution=None,
                                  cmap='Blues',
                                  linewidth=1.5):
    """
    Plot the forecasts of several one step ahead models, by point or by interval 
    :param original: Original time series data (list)
    :param models: List of models to compare
    :param colors: List of models colors
    :param distributions: True to plot a distribution
    :param time_from: index of data poit to start the ahead forecasting
    :param time_to: number of steps ahead to forecast
    :param interpol: Fill space between distribution plots
    :param save: Save the picture on file
    :param file: Filename to save the picture
    :param tam: Size of the picture
    :param resolution: 
    :param cmap: Color map to be used on distribution plot 
    :param option: Distribution type to be passed for models
    :return: 
    """
    fig = plt.figure(figsize=tam)
    ax = fig.add_subplot(111)

    cm = plt.get_cmap(cmap)
    cNorm = pltcolors.Normalize(vmin=0, vmax=1)
    scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm)

    if resolution is None: resolution = (max(original) - min(original)) / 100

    mi = []
    ma = []

    for count, fts in enumerate(models, start=0):
        if fts.has_probability_forecasting and distributions[count]:
            density = fts.forecast_ahead_distribution(
                original[time_from - fts.order:time_from],
                time_to,
                resolution=resolution)

            #plot_density_scatter(ax, cmap, density, fig, resolution, time_from, time_to)
            plot_density_rectange(ax, cm, density, fig, resolution, time_from,
                                  time_to)

        if fts.has_interval_forecasting and intervals:
            forecasts = fts.forecast_ahead_interval(
                original[time_from - fts.order:time_from], time_to)
            lower = [kk[0] for kk in forecasts]
            upper = [kk[1] for kk in forecasts]
            mi.append(min(lower))
            ma.append(max(upper))
            for k in np.arange(0, time_from - fts.order):
                lower.insert(0, None)
                upper.insert(0, None)
            ax.plot(lower,
                    color=colors[count],
                    label=fts.shortname,
                    linewidth=linewidth)
            ax.plot(upper, color=colors[count], linewidth=linewidth * 1.5)

    ax.plot(original,
            color='black',
            label="Original",
            linewidth=linewidth * 1.5)
    handles0, labels0 = ax.get_legend_handles_labels()
    if True in distributions:
        lgd = ax.legend(handles0, labels0, loc=2)
    else:
        lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1))
    _mi = min(mi)
    if _mi < 0:
        _mi *= 1.1
    else:
        _mi *= 0.9
    _ma = max(ma)
    if _ma < 0:
        _ma *= 0.9
    else:
        _ma *= 1.1

    ax.set_ylim([_mi, _ma])
    ax.set_ylabel('F(T)')
    ax.set_xlabel('T')
    ax.set_xlim([0, len(original)])

    cUtil.show_and_save_image(fig, file, save, lgd=lgd)
Example #18
0
def plot_dataframe_point(file_synthetic,
                         file_analytic,
                         experiments,
                         tam,
                         save=False,
                         file=None,
                         sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'],
                         sort_ascend=[1, 1, 1, 1],
                         save_best=False,
                         ignore=None,
                         replace=None):

    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam)

    axes[0].set_title('RMSE')
    axes[1].set_title('SMAPE')
    axes[2].set_title('U Statistic')

    dat_syn = pd.read_csv(file_synthetic,
                          sep=";",
                          usecols=point_dataframe_synthetic_columns())

    bests = find_best(dat_syn, sort_columns, sort_ascend)

    dat_ana = pd.read_csv(
        file_analytic,
        sep=";",
        usecols=point_dataframe_analytic_columns(experiments))

    data_columns = analytical_data_columns(experiments)

    if save_best:
        dat = pd.DataFrame.from_dict(bests, orient='index')
        dat.to_csv(Util.uniquefilename(
            file_synthetic.replace("synthetic", "best")),
                   sep=";",
                   index=False)

    rmse = []
    smape = []
    u = []
    times = []
    labels = []

    for b in sorted(bests.keys()):
        if check_ignore_list(b, ignore):
            continue

        best = bests[b]
        tmp = dat_ana[(dat_ana.Model == best["Model"])
                      & (dat_ana.Order == best["Order"])
                      & (dat_ana.Scheme == best["Scheme"]) &
                      (dat_ana.Partitions == best["Partitions"])]
        rmse.append(extract_measure(tmp, 'RMSE', data_columns))
        smape.append(extract_measure(tmp, 'SMAPE', data_columns))
        u.append(extract_measure(tmp, 'U', data_columns))
        times.append(extract_measure(tmp, 'TIME', data_columns))

        labels.append(
            check_replace_list(best["Model"] + " " + str(best["Order"]),
                               replace))

    axes[0].boxplot(rmse, labels=labels, autorange=True, showmeans=True)
    axes[0].set_title("RMSE")
    axes[1].boxplot(smape, labels=labels, autorange=True, showmeans=True)
    axes[1].set_title("SMAPE")
    axes[2].boxplot(u, labels=labels, autorange=True, showmeans=True)
    axes[2].set_title("U Statistic")

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)
Example #19
0
def sliding_window_simple_search(data, windowsize, model, partitions, orders,
                                 **kwargs):

    _3d = len(orders) > 1
    ret = []
    errors = np.array([[0 for k in range(len(partitions))]
                       for kk in range(len(orders))])
    forecasted_best = []

    figsize = kwargs.get('figsize', [10, 15])
    fig = plt.figure(figsize=figsize)

    plotforecasts = kwargs.get('plotforecasts', False)
    if plotforecasts:
        ax0 = fig.add_axes([0, 0.4, 0.9, 0.5])  # left, bottom, width, height
        ax0.set_xlim([0, len(data)])
        ax0.set_ylim([min(data) * 0.9, max(data) * 1.1])
        ax0.set_title('Forecasts')
        ax0.set_ylabel('F(T)')
        ax0.set_xlabel('T')
    min_rmse = 1000000.0
    best = None

    intervals = kwargs.get('intervals', False)
    threshold = kwargs.get('threshold', 0.5)

    progressbar = kwargs.get('progressbar', None)

    rng1 = enumerate(partitions, start=0)

    if progressbar:
        from tqdm import tqdm
        rng1 = enumerate(tqdm(partitions), start=0)

    for pc, p in rng1:
        fs = Grid.GridPartitioner(data=data, npart=p)

        rng2 = enumerate(orders, start=0)

        if progressbar:
            rng2 = enumerate(tqdm(orders), start=0)

        for oc, o in rng2:
            _error = []
            for ct, train, test in Util.sliding_window(data, windowsize, 0.8,
                                                       **kwargs):
                fts = model("q = " + str(p) + " n = " + str(o), partitioner=fs)
                fts.fit(train, order=o)
                if not intervals:
                    forecasted = fts.forecast(test)
                    if not fts.has_seasonality:
                        _error.append(
                            Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted[:-1])))
                    else:
                        _error.append(
                            Measures.rmse(np.array(test[o:]),
                                          np.array(forecasted)))
                    for kk in range(o):
                        forecasted.insert(0, None)
                    if plotforecasts: ax0.plot(forecasted, label=fts.name)
                else:
                    forecasted = fts.forecast_interval(test)
                    _error.append(1.0 - Measures.rmse_interval(
                        np.array(test[o:]), np.array(forecasted[:-1])))
            error = np.nanmean(_error)
            errors[oc, pc] = error
            if (min_rmse - error) > threshold:
                min_rmse = error
                best = fts
                forecasted_best = forecasted

    # print(min_rmse)
    if plotforecasts:
        # handles0, labels0 = ax0.get_legend_handles_labels()
        # ax0.legend(handles0, labels0)
        elev = kwargs.get('elev', 30)
        azim = kwargs.get('azim', 144)
        ax0.plot(test, label="Original", linewidth=3.0, color="black")
        if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    if not plotforecasts:
        ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim)
    # ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d')
    if _3d:
        ax1.set_title('Error Surface')
        ax1.set_ylabel('Model order')
        ax1.set_xlabel('Number of partitions')
        ax1.set_zlabel('RMSE')
        X, Y = np.meshgrid(partitions, orders)
        surf = ax1.plot_surface(X,
                                Y,
                                errors,
                                rstride=1,
                                cstride=1,
                                antialiased=True)
    else:
        ax1 = fig.add_axes([0, 1, 0.9, 0.9])
        ax1.set_title('Error Curve')
        ax1.set_ylabel('Number of partitions')
        ax1.set_xlabel('RMSE')
        ax0.plot(errors, partitions)
    ret.append(best)
    ret.append(forecasted_best)

    # plt.tight_layout()

    file = kwargs.get('file', None)
    save = kwargs.get('save', False)

    Util.show_and_save_image(fig, file, save)

    return ret
Example #20
0
def unified_scaled_interval(
        experiments,
        tam,
        save=False,
        file=None,
        sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'],
        sort_ascend=[True, False, True, True],
        save_best=False,
        ignore=None,
        replace=None):
    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam)

    axes[0].set_title('Sharpness')
    axes[1].set_title('Resolution')
    axes[2].set_title('Coverage')

    models = {}

    for experiment in experiments:

        mdl = {}

        dat_syn = pd.read_csv(experiment[0],
                              sep=";",
                              usecols=interval_dataframe_synthetic_columns())

        bests = find_best(dat_syn, sort_columns, sort_ascend)

        dat_ana = pd.read_csv(experiment[1],
                              sep=";",
                              usecols=interval_dataframe_analytic_columns(
                                  experiment[2]))

        sharpness = []
        resolution = []
        coverage = []
        times = []

        data_columns = analytical_data_columns(experiment[2])

        for b in sorted(bests.keys()):
            if check_ignore_list(b, ignore):
                continue

            if b not in models:
                models[b] = {}
                models[b]['sharpness'] = []
                models[b]['resolution'] = []
                models[b]['coverage'] = []
                models[b]['times'] = []

            if b not in mdl:
                mdl[b] = {}
                mdl[b]['sharpness'] = []
                mdl[b]['resolution'] = []
                mdl[b]['coverage'] = []
                mdl[b]['times'] = []

            best = bests[b]
            print(best)
            tmp = dat_ana[(dat_ana.Model == best["Model"])
                          & (dat_ana.Order == best["Order"])
                          & (dat_ana.Scheme == best["Scheme"]) &
                          (dat_ana.Partitions == best["Partitions"])]
            tmpl = extract_measure(tmp, 'Sharpness', data_columns)
            mdl[b]['sharpness'].extend(tmpl)
            sharpness.extend(tmpl)
            tmpl = extract_measure(tmp, 'Resolution', data_columns)
            mdl[b]['resolution'].extend(tmpl)
            resolution.extend(tmpl)
            tmpl = extract_measure(tmp, 'Coverage', data_columns)
            mdl[b]['coverage'].extend(tmpl)
            coverage.extend(tmpl)
            tmpl = extract_measure(tmp, 'TIME', data_columns)
            mdl[b]['times'].extend(tmpl)
            times.extend(tmpl)

            models[b]['label'] = check_replace_list(
                best["Model"] + " " + str(best["Order"]), replace)

        sharpness_param = scale_params(sharpness)
        resolution_param = scale_params(resolution)
        coverage_param = scale_params(coverage)
        times_param = scale_params(times)

        for key in sorted(models.keys()):
            models[key]['sharpness'].extend(
                scale(mdl[key]['sharpness'], sharpness_param))
            models[key]['resolution'].extend(
                scale(mdl[key]['resolution'], resolution_param))
            models[key]['coverage'].extend(
                scale(mdl[key]['coverage'], coverage_param))
            models[key]['times'].extend(scale(mdl[key]['times'], times_param))

    sharpness = []
    resolution = []
    coverage = []
    times = []
    labels = []
    for key in sorted(models.keys()):
        sharpness.append(models[key]['sharpness'])
        resolution.append(models[key]['resolution'])
        coverage.append(models[key]['coverage'])
        times.append(models[key]['times'])
        labels.append(models[key]['label'])

    axes[0].boxplot(sharpness, labels=labels, autorange=True, showmeans=True)
    axes[1].boxplot(resolution, labels=labels, autorange=True, showmeans=True)
    axes[2].boxplot(coverage, labels=labels, autorange=True, showmeans=True)

    plt.tight_layout()

    Util.show_and_save_image(fig, file, save)