def plot_sets(data, sets, titles, tam=[12, 10], save=False, file=None): num = len(sets) #fig = plt.figure(figsize=tam) maxx = max(data) minx = min(data) #h = 1/num #print(h) fig, axes = plt.subplots(nrows=num, ncols=1, figsize=tam) for k in np.arange(0, num): ticks = [] x = [] ax = axes[k] ax.set_title(titles[k]) ax.set_ylim([0, 1.1]) for key in sets[k].keys(): s = sets[k][key] if s.mf == Membership.trimf: ax.plot(s.parameters, [0, 1, 0]) elif s.mf == Membership.gaussmf: tmpx = [kk for kk in np.arange(s.lower, s.upper)] tmpy = [s.membership(kk) for kk in np.arange(s.lower, s.upper)] ax.plot(tmpx, tmpy) elif s.mf == Membership.trapmf: ax.plot(s.parameters, [0, 1, 1, 0]) ticks.append(str(round(s.centroid, 0)) + '\n' + s.name) x.append(s.centroid) ax.xaxis.set_ticklabels(ticks) ax.xaxis.set_ticks(x) plt.tight_layout() Util.show_and_save_image(fig, file, save)
def plot_residuals(targets, models, tam=[8, 8], save=False, file=None): fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam) for c, mfts in enumerate(models, start=0): if len(models) > 1: ax = axes[c] else: ax = axes forecasts = mfts.forecast(targets) res = residuals(targets, forecasts, mfts.order) mu = np.mean(res) sig = np.std(res) if c == 0: ax[0].set_title("Residuals", size='large') ax[0].set_ylabel(mfts.shortname, size='large') ax[0].set_xlabel(' ') ax[0].plot(res) if c == 0: ax[1].set_title("Residuals Autocorrelation", size='large') ax[1].set_ylabel('ACS') ax[1].set_xlabel('Lag') ax[1].acorr(res) if c == 0: ax[2].set_title("Residuals Histogram", size='large') ax[2].set_ylabel('Freq') ax[2].set_xlabel('Bins') ax[2].hist(res) plt.tight_layout() Util.show_and_save_image(fig, file, save)
def single_plot_residuals(targets, forecasts, order, tam=[8, 8], save=False, file=None): fig, ax = plt.subplots(nrows=1, ncols=3, figsize=tam) res = residuals(targets, forecasts, order) ax[0].set_title("Residuals", size='large') ax[0].set_ylabel("Model", size='large') ax[0].set_xlabel(' ') ax[0].plot(res) ax[1].set_title("Residuals Autocorrelation", size='large') ax[1].set_ylabel('ACS') ax[1].set_xlabel('Lag') ax[1].acorr(res) ax[2].set_title("Residuals Histogram", size='large') ax[2].set_ylabel('Freq') ax[2].set_xlabel('Bins') ax[2].hist(res) plt.tight_layout() Util.show_and_save_image(fig, file, save)
def pftsExploreOrderAndPartitions(data, save=False, file=None): fig, axes = plt.subplots(nrows=4, ncols=1, figsize=[6, 8]) data_fs1 = Grid.GridPartitioner(data=data, npart=10).sets mi = [] ma = [] axes[0].set_title('Point Forecasts by Order') axes[2].set_title('Interval Forecasts by Order') for order in np.arange(1, 6): fts = pwfts.ProbabilisticWeightedFTS("") fts.shortname = "n = " + str(order) fts.train(data, sets=data_fs1.sets, order=order) point_forecasts = fts.forecast(data) interval_forecasts = fts.forecast_interval(data) lower = [kk[0] for kk in interval_forecasts] upper = [kk[1] for kk in interval_forecasts] mi.append(min(lower) * 0.95) ma.append(max(upper) * 1.05) for k in np.arange(0, order): point_forecasts.insert(0, None) lower.insert(0, None) upper.insert(0, None) axes[0].plot(point_forecasts, label=fts.shortname) axes[2].plot(lower, label=fts.shortname) axes[2].plot(upper) axes[1].set_title('Point Forecasts by Number of Partitions') axes[3].set_title('Interval Forecasts by Number of Partitions') for partitions in np.arange(5, 11): data_fs = Grid.GridPartitioner(data=data, npart=partitions).sets fts = pwfts.ProbabilisticWeightedFTS("") fts.shortname = "q = " + str(partitions) fts.train(data, sets=data_fs.sets, order=1) point_forecasts = fts.forecast(data) interval_forecasts = fts.forecast_interval(data) lower = [kk[0] for kk in interval_forecasts] upper = [kk[1] for kk in interval_forecasts] mi.append(min(lower) * 0.95) ma.append(max(upper) * 1.05) point_forecasts.insert(0, None) lower.insert(0, None) upper.insert(0, None) axes[1].plot(point_forecasts, label=fts.shortname) axes[3].plot(lower, label=fts.shortname) axes[3].plot(upper) for ax in axes: ax.set_ylabel('F(T)') ax.set_xlabel('T') ax.plot(data, label="Original", color="black", linewidth=1.5) handles, labels = ax.get_legend_handles_labels() ax.legend(handles, labels, loc=2, bbox_to_anchor=(1, 1)) ax.set_ylim([min(mi), max(ma)]) ax.set_xlim([0, len(data)]) plt.tight_layout() cUtil.show_and_save_image(fig, file, save)
def plot_sets(partitioner, start=0, end=10, step=1, tam=[5, 5], colors=None, save=False, file=None, axes=None, data=None, window_size=1, only_lines=False, legend=True): range = np.arange(start, end, step) ticks = [] if axes is None: fig, axes = plt.subplots(nrows=1, ncols=1, figsize=tam) for ct, key in enumerate(partitioner.ordered_sets): fset = partitioner.sets[key] if not only_lines: for t in range: tdisp = t - (t % window_size) fset.membership(0, tdisp) param = fset.perturbated_parameters[str(tdisp)] if fset.mf == Membership.trimf: if t == start: line = axes.plot([t, t + 1, t], param, label=fset.name) fset.metadata['color'] = line[0].get_color() else: axes.plot([t, t + 1, t], param, c=fset.metadata['color']) ticks.extend(["t+" + str(t), ""]) else: tmp = [] for t in range: tdisp = t - (t % window_size) fset.membership(0, tdisp) param = fset.perturbated_parameters[str(tdisp)] tmp.append(np.polyval(param, tdisp)) axes.plot(range, tmp, ls="--", c="blue") axes.set_ylabel("Universe of Discourse") axes.set_xlabel("Time") plt.xticks([k for k in range], ticks, rotation='vertical') if legend: handles0, labels0 = axes.get_legend_handles_labels() lgd = axes.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1)) if data is not None: axes.plot(np.arange(start, start + len(data), 1), data, c="black") if file is not None: plt.tight_layout() Util.show_and_save_image(fig, file, save)
def plot_sets_conditional(model, data, start=0, end=10, step=1, tam=[5, 5], colors=None, save=False, file=None, axes=None): range = np.arange(start, end, step) ticks = [] if axes is None: fig, axes = plt.subplots(nrows=1, ncols=1, figsize=tam) for ct, key in enumerate(model.partitioner.ordered_sets): set = model.partitioner.sets[key] for t in range: tdisp = model.perturbation_factors(data[t]) set.perturbate_parameters(tdisp[ct]) param = set.perturbated_parameters[str(tdisp[ct])] if set.mf == Membership.trimf: if t == start: line = axes.plot([t, t + 1, t], param, label=set.name) set.metadata['color'] = line[0].get_color() else: axes.plot([t, t + 1, t], param, c=set.metadata['color']) ticks.extend(["t+" + str(t), ""]) axes.set_ylabel("Universe of Discourse") axes.set_xlabel("Time") plt.xticks([k for k in range], ticks, rotation='vertical') handles0, labels0 = axes.get_legend_handles_labels() lgd = axes.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1)) if data is not None: axes.plot(np.arange(start, start + len(data), 1), data, c="black") plt.tight_layout() Util.show_and_save_image(fig, file, save)
def plotResiduals(targets, models, tam=[8, 8], save=False, file=None): """ Plot residuals and statistics :param targets: :param models: :param tam: :param save: :param file: :return: """ fig, axes = plt.subplots(nrows=len(models), ncols=3, figsize=tam) for c, mfts in enumerate(models): if len(models) > 1: ax = axes[c] else: ax = axes forecasts = mfts.forecast(targets) res = residuals(targets,forecasts,mfts.order) mu = np.mean(res) sig = np.std(res) ax[0].set_title("Residuals Mean=" + str(mu) + " STD = " + str(sig)) ax[0].set_ylabel('E') ax[0].set_xlabel('T') ax[0].plot(res) ax[1].set_title("Residuals Autocorrelation") ax[1].set_ylabel('ACS') ax[1].set_xlabel('Lag') ax[1].acorr(res) ax[2].set_title("Residuals Histogram") ax[2].set_ylabel('Freq') ax[2].set_xlabel('Bins') ax[2].hist(res) c += 1 plt.tight_layout() Util.show_and_save_image(fig, file, save)
def plot_residuals_by_model(targets, models, tam=[8, 8], save=False, file=None): import scipy as sp fig, axes = plt.subplots(nrows=len(models), ncols=4, figsize=tam) for c, mfts in enumerate(models, start=0): if len(models) > 1: ax = axes[c] else: ax = axes forecasts = mfts.predict(targets) res = residuals(targets, forecasts, mfts.order + 1) mu = np.mean(res) sig = np.std(res) if c == 0: ax[0].set_title("Residuals", size='large') ax[0].set_ylabel(mfts.shortname, size='large') ax[0].set_xlabel(' ') ax[0].plot(res) if c == 0: ax[1].set_title("Autocorrelation", size='large') ax[1].set_ylabel('ACS') ax[1].set_xlabel('Lag') ax[1].acorr(res) if c == 0: ax[2].set_title("Histogram", size='large') ax[2].set_ylabel('Freq') ax[2].set_xlabel('Bins') ax[2].hist(res) if c == 0: ax[3].set_title("QQ Plot", size='large') _, (__, ___, r) = sp.stats.probplot(res, plot=ax[3], fit=True) plt.tight_layout() Util.show_and_save_image(fig, file, save)
def single_plot_residuals(res, order, tam=[10, 7], save=False, file=None): import scipy as sp fig, ax = plt.subplots(nrows=2, ncols=2, figsize=tam) ax[0][0].set_title("Residuals", size='large') ax[0][0].plot(res) ax[0][1].set_title("Autocorrelation", size='large') ax[0][1].set_ylabel('ACF') ax[0][1].set_xlabel('Lag') ax[0][1].acorr(res) ax[1][0].set_title("Histogram", size='large') ax[1][0].set_ylabel('Freq') ax[1][0].set_xlabel('Bins') ax[1][0].hist(res) _, (__, ___, r) = sp.stats.probplot(res, plot=ax[1][1], fit=True) plt.tight_layout() Util.show_and_save_image(fig, file, save)
def simpleSearch_RMSE(train, test, model, partitions, orders, save=False, file=None, tam=[10, 15], plotforecasts=False, elev=30, azim=144, intervals=False, parameters=None, partitioner=Grid.GridPartitioner, transformation=None, indexer=None): _3d = len(orders) > 1 ret = [] if _3d: errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))]) else: errors = [] forecasted_best = [] fig = plt.figure(figsize=tam) # fig.suptitle("Comparação de modelos ") if plotforecasts: ax0 = fig.add_axes([0, 0.4, 0.9, 0.5]) # left, bottom, width, height ax0.set_xlim([0, len(train)]) ax0.set_ylim([min(train) * 0.9, max(train) * 1.1]) ax0.set_title('Forecasts') ax0.set_ylabel('F(T)') ax0.set_xlabel('T') min_rmse = 1000000.0 best = None for pc, p in enumerate(partitions, start=0): sets = partitioner(data=train, npart=p, transformation=transformation).sets for oc, o in enumerate(orders, start=0): fts = model("q = " + str(p) + " n = " + str(o)) fts.append_transformation(transformation) fts.train(train, sets=sets, order=o, parameters=parameters) if not intervals: forecasted = fts.forecast(test) if not fts.has_seasonality: error = Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1])) else: error = Measures.rmse(np.array(test[o:]), np.array(forecasted)) for kk in range(o): forecasted.insert(0, None) if plotforecasts: ax0.plot(forecasted, label=fts.name) else: forecasted = fts.forecast_interval(test) error = 1.0 - Measures.rmse_interval(np.array(test[o:]), np.array(forecasted[:-1])) if _3d: errors[oc, pc] = error else: errors.append(error) if error < min_rmse: min_rmse = error best = fts forecasted_best = forecasted # print(min_rmse) if plotforecasts: # handles0, labels0 = ax0.get_legend_handles_labels() # ax0.legend(handles0, labels0) ax0.plot(test, label="Original", linewidth=3.0, color="black") if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) if _3d and not plotforecasts: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) ax1.set_title('Error Surface') ax1.set_ylabel('Model order') ax1.set_xlabel('Number of partitions') ax1.set_zlabel('RMSE') X, Y = np.meshgrid(partitions, orders) surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True) else: ax1 = fig.add_axes([0, 1, 0.9, 0.9]) ax1.set_title('Error Curve') ax1.set_xlabel('Number of partitions') ax1.set_ylabel('RMSE') ax1.plot(partitions, errors) ret.append(best) ret.append(forecasted_best) ret.append(min_rmse) # plt.tight_layout() cUtil.show_and_save_image(fig, file, save) return ret
def plot_dataframe_interval( file_synthetic, file_analytic, experiments, tam, save=False, file=None, sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_ascend=[True, False, True, True], save_best=False, ignore=None, replace=None): fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam) axes[0].set_title('Sharpness') axes[1].set_title('Resolution') axes[2].set_title('Coverage') dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=interval_dataframe_synthetic_columns()) bests = find_best(dat_syn, sort_columns, sort_ascend) dat_ana = pd.read_csv( file_analytic, sep=";", usecols=interval_dataframe_analytic_columns(experiments)) data_columns = analytical_data_columns(experiments) if save_best: dat = pd.DataFrame.from_dict(bests, orient='index') dat.to_csv(Util.uniquefilename( file_synthetic.replace("synthetic", "best")), sep=";", index=False) sharpness = [] resolution = [] coverage = [] times = [] labels = [] bounds_shp = [] for b in sorted(bests.keys()): if check_ignore_list(b, ignore): continue best = bests[b] df = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"]) & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])] sharpness.append(extract_measure(df, 'Sharpness', data_columns)) resolution.append(extract_measure(df, 'Resolution', data_columns)) coverage.append(extract_measure(df, 'Coverage', data_columns)) times.append(extract_measure(df, 'TIME', data_columns)) labels.append( check_replace_list(best["Model"] + " " + str(best["Order"]), replace)) axes[0].boxplot(sharpness, labels=labels, autorange=True, showmeans=True) axes[0].set_title("Sharpness") axes[1].boxplot(resolution, labels=labels, autorange=True, showmeans=True) axes[1].set_title("Resolution") axes[2].boxplot(coverage, labels=labels, autorange=True, showmeans=True) axes[2].set_title("Coverage") axes[2].set_ylim([0, 1.1]) plt.tight_layout() Util.show_and_save_image(fig, file, save)
def unified_scaled_point(experiments, tam, save=False, file=None, sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_ascend=[1, 1, 1, 1], save_best=False, ignore=None, replace=None): fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam) axes[0].set_title('RMSE') axes[1].set_title('SMAPE') axes[2].set_title('U Statistic') models = {} for experiment in experiments: mdl = {} dat_syn = pd.read_csv(experiment[0], sep=";", usecols=point_dataframe_synthetic_columns()) bests = find_best(dat_syn, sort_columns, sort_ascend) dat_ana = pd.read_csv(experiment[1], sep=";", usecols=point_dataframe_analytic_columns( experiment[2])) rmse = [] smape = [] u = [] times = [] data_columns = analytical_data_columns(experiment[2]) for b in sorted(bests.keys()): if check_ignore_list(b, ignore): continue if b not in models: models[b] = {} models[b]['rmse'] = [] models[b]['smape'] = [] models[b]['u'] = [] models[b]['times'] = [] if b not in mdl: mdl[b] = {} mdl[b]['rmse'] = [] mdl[b]['smape'] = [] mdl[b]['u'] = [] mdl[b]['times'] = [] best = bests[b] tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"]) & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])] tmpl = extract_measure(tmp, 'RMSE', data_columns) mdl[b]['rmse'].extend(tmpl) rmse.extend(tmpl) tmpl = extract_measure(tmp, 'SMAPE', data_columns) mdl[b]['smape'].extend(tmpl) smape.extend(tmpl) tmpl = extract_measure(tmp, 'U', data_columns) mdl[b]['u'].extend(tmpl) u.extend(tmpl) tmpl = extract_measure(tmp, 'TIME', data_columns) mdl[b]['times'].extend(tmpl) times.extend(tmpl) models[b]['label'] = check_replace_list( best["Model"] + " " + str(best["Order"]), replace) print("GLOBAL") rmse_param = scale_params(rmse) stats("rmse", rmse) smape_param = scale_params(smape) stats("smape", smape) u_param = scale_params(u) stats("u", u) times_param = scale_params(times) for key in sorted(models.keys()): models[key]['rmse'].extend(scale(mdl[key]['rmse'], rmse_param)) models[key]['smape'].extend(scale(mdl[key]['smape'], smape_param)) models[key]['u'].extend(scale(mdl[key]['u'], u_param)) models[key]['times'].extend(scale(mdl[key]['times'], times_param)) rmse = [] smape = [] u = [] times = [] labels = [] for key in sorted(models.keys()): print(key) rmse.append(models[key]['rmse']) stats("rmse", models[key]['rmse']) smape.append(models[key]['smape']) stats("smape", models[key]['smape']) u.append(models[key]['u']) stats("u", models[key]['u']) times.append(models[key]['times']) labels.append(models[key]['label']) axes[0].boxplot(rmse, labels=labels, autorange=True, showmeans=True) axes[0].set_title("RMSE") axes[1].boxplot(smape, labels=labels, autorange=True, showmeans=True) axes[1].set_title("SMAPE") axes[2].boxplot(u, labels=labels, autorange=True, showmeans=True) axes[2].set_title("U Statistic") plt.tight_layout() Util.show_and_save_image(fig, file, save)
def plot_dataframe_probabilistic( file_synthetic, file_analytic, experiments, tam, save=False, file=None, sort_columns=['CRPS1AVG', 'CRPS2AVG', 'CRPS1STD', 'CRPS2STD'], sort_ascend=[True, True, True, True], save_best=False, ignore=None, replace=None): fig, axes = plt.subplots(nrows=2, ncols=1, figsize=tam) axes[0].set_title('CRPS') axes[1].set_title('CRPS') dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=probabilistic_dataframe_synthetic_columns()) bests = find_best(dat_syn, sort_columns, sort_ascend) dat_ana = pd.read_csv( file_analytic, sep=";", usecols=probabilistic_dataframe_analytic_columns(experiments)) data_columns = analytical_data_columns(experiments) if save_best: dat = pd.DataFrame.from_dict(bests, orient='index') dat.to_csv(Util.uniquefilename( file_synthetic.replace("synthetic", "best")), sep=";", index=False) crps1 = [] crps2 = [] labels = [] for b in sorted(bests.keys()): if check_ignore_list(b, ignore): continue best = bests[b] df = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"]) & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])] crps1.append(extract_measure(df, 'CRPS_Interval', data_columns)) crps2.append(extract_measure(df, 'CRPS_Distribution', data_columns)) labels.append( check_replace_list(best["Model"] + " " + str(best["Order"]), replace)) axes[0].boxplot(crps1, labels=labels, autorange=True, showmeans=True) axes[1].boxplot(crps2, labels=labels, autorange=True, showmeans=True) plt.tight_layout() Util.show_and_save_image(fig, file, save)
def unified_scaled_probabilistic(experiments, tam, save=False, file=None, sort_columns=['CRPSAVG', 'CRPSSTD'], sort_ascend=[True, True], save_best=False, ignore=None, replace=None): fig, axes = plt.subplots(nrows=1, ncols=1, figsize=tam) axes.set_title('CRPS') #axes[1].set_title('CRPS Distribution Ahead') models = {} for experiment in experiments: print(experiment) mdl = {} dat_syn = pd.read_csv( experiment[0], sep=";", usecols=probabilistic_dataframe_synthetic_columns()) bests = find_best(dat_syn, sort_columns, sort_ascend) dat_ana = pd.read_csv(experiment[1], sep=";", usecols=probabilistic_dataframe_analytic_columns( experiment[2])) crps1 = [] crps2 = [] data_columns = analytical_data_columns(experiment[2]) for b in sorted(bests.keys()): if check_ignore_list(b, ignore): continue if b not in models: models[b] = {} models[b]['crps1'] = [] models[b]['crps2'] = [] if b not in mdl: mdl[b] = {} mdl[b]['crps1'] = [] mdl[b]['crps2'] = [] best = bests[b] print(best) tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"]) & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])] tmpl = extract_measure(tmp, 'CRPS_Interval', data_columns) mdl[b]['crps1'].extend(tmpl) crps1.extend(tmpl) tmpl = extract_measure(tmp, 'CRPS_Distribution', data_columns) mdl[b]['crps2'].extend(tmpl) crps2.extend(tmpl) models[b]['label'] = check_replace_list( best["Model"] + " " + str(best["Order"]), replace) crps1_param = scale_params(crps1) crps2_param = scale_params(crps2) for key in sorted(mdl.keys()): print(key) models[key]['crps1'].extend(scale(mdl[key]['crps1'], crps1_param)) models[key]['crps2'].extend(scale(mdl[key]['crps2'], crps2_param)) crps1 = [] crps2 = [] labels = [] for key in sorted(models.keys()): crps1.append(models[key]['crps1']) crps2.append(models[key]['crps2']) labels.append(models[key]['label']) axes[0].boxplot(crps1, labels=labels, autorange=True, showmeans=True) axes[1].boxplot(crps2, labels=labels, autorange=True, showmeans=True) plt.tight_layout() Util.show_and_save_image(fig, file, save)
def plot_dataframe_interval_pinball( file_synthetic, file_analytic, experiments, tam, save=False, file=None, sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_ascend=[True, False, True, True], save_best=False, ignore=None, replace=None): fig, axes = plt.subplots(nrows=1, ncols=4, figsize=tam) axes[0].set_title(r'$\tau=0.05$') axes[1].set_title(r'$\tau=0.25$') axes[2].set_title(r'$\tau=0.75$') axes[3].set_title(r'$\tau=0.95$') dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=interval_dataframe_synthetic_columns()) bests = find_best(dat_syn, sort_columns, sort_ascend) dat_ana = pd.read_csv( file_analytic, sep=";", usecols=interval_dataframe_analytic_columns(experiments)) data_columns = analytical_data_columns(experiments) if save_best: dat = pd.DataFrame.from_dict(bests, orient='index') dat.to_csv(Util.uniquefilename( file_synthetic.replace("synthetic", "best")), sep=";", index=False) q05 = [] q25 = [] q75 = [] q95 = [] labels = [] for b in sorted(bests.keys()): if check_ignore_list(b, ignore): continue best = bests[b] df = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"]) & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])] q05.append(extract_measure(df, 'Q05', data_columns)) q25.append(extract_measure(df, 'Q25', data_columns)) q75.append(extract_measure(df, 'Q75', data_columns)) q95.append(extract_measure(df, 'Q95', data_columns)) labels.append( check_replace_list(best["Model"] + " " + str(best["Order"]), replace)) axes[0].boxplot(q05, labels=labels, vert=False, autorange=True, showmeans=True) axes[1].boxplot(q25, labels=labels, vert=False, autorange=True, showmeans=True) axes[2].boxplot(q75, labels=labels, vert=False, autorange=True, showmeans=True) axes[3].boxplot(q95, labels=labels, vert=False, autorange=True, showmeans=True) plt.tight_layout() Util.show_and_save_image(fig, file, save)
def unified_scaled_interval_pinball( experiments, tam, save=False, file=None, sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_ascend=[True, False, True, True], save_best=False, ignore=None, replace=None): fig, axes = plt.subplots(nrows=1, ncols=4, figsize=tam) axes[0].set_title(r'$\tau=0.05$') axes[1].set_title(r'$\tau=0.25$') axes[2].set_title(r'$\tau=0.75$') axes[3].set_title(r'$\tau=0.95$') models = {} for experiment in experiments: mdl = {} dat_syn = pd.read_csv(experiment[0], sep=";", usecols=interval_dataframe_synthetic_columns()) bests = find_best(dat_syn, sort_columns, sort_ascend) dat_ana = pd.read_csv(experiment[1], sep=";", usecols=interval_dataframe_analytic_columns( experiment[2])) q05 = [] q25 = [] q75 = [] q95 = [] data_columns = analytical_data_columns(experiment[2]) for b in sorted(bests.keys()): if check_ignore_list(b, ignore): continue if b not in models: models[b] = {} models[b]['q05'] = [] models[b]['q25'] = [] models[b]['q75'] = [] models[b]['q95'] = [] if b not in mdl: mdl[b] = {} mdl[b]['q05'] = [] mdl[b]['q25'] = [] mdl[b]['q75'] = [] mdl[b]['q95'] = [] best = bests[b] print(best) tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"]) & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])] tmpl = extract_measure(tmp, 'Q05', data_columns) mdl[b]['q05'].extend(tmpl) q05.extend(tmpl) tmpl = extract_measure(tmp, 'Q25', data_columns) mdl[b]['q25'].extend(tmpl) q25.extend(tmpl) tmpl = extract_measure(tmp, 'Q75', data_columns) mdl[b]['q75'].extend(tmpl) q75.extend(tmpl) tmpl = extract_measure(tmp, 'Q95', data_columns) mdl[b]['q95'].extend(tmpl) q95.extend(tmpl) models[b]['label'] = check_replace_list( best["Model"] + " " + str(best["Order"]), replace) q05_param = scale_params(q05) q25_param = scale_params(q25) q75_param = scale_params(q75) q95_param = scale_params(q95) for key in sorted(models.keys()): models[key]['q05'].extend(scale(mdl[key]['q05'], q05_param)) models[key]['q25'].extend(scale(mdl[key]['q25'], q25_param)) models[key]['q75'].extend(scale(mdl[key]['q75'], q75_param)) models[key]['q95'].extend(scale(mdl[key]['q95'], q95_param)) q05 = [] q25 = [] q75 = [] q95 = [] labels = [] for key in sorted(models.keys()): q05.append(models[key]['q05']) q25.append(models[key]['q25']) q75.append(models[key]['q75']) q95.append(models[key]['q95']) labels.append(models[key]['label']) axes[0].boxplot(q05, labels=labels, vert=False, autorange=True, showmeans=True) axes[1].boxplot(q25, labels=labels, vert=False, autorange=True, showmeans=True) axes[2].boxplot(q75, labels=labels, vert=False, autorange=True, showmeans=True) axes[3].boxplot(q95, labels=labels, vert=False, autorange=True, showmeans=True) plt.tight_layout() Util.show_and_save_image(fig, file, save)
def plot_compared_intervals_ahead(original, models, colors, distributions, time_from, time_to, intervals=True, save=False, file=None, tam=[20, 5], resolution=None, cmap='Blues', linewidth=1.5): """ Plot the forecasts of several one step ahead models, by point or by interval :param original: Original time series data (list) :param models: List of models to compare :param colors: List of models colors :param distributions: True to plot a distribution :param time_from: index of data poit to start the ahead forecasting :param time_to: number of steps ahead to forecast :param interpol: Fill space between distribution plots :param save: Save the picture on file :param file: Filename to save the picture :param tam: Size of the picture :param resolution: :param cmap: Color map to be used on distribution plot :param option: Distribution type to be passed for models :return: """ fig = plt.figure(figsize=tam) ax = fig.add_subplot(111) cm = plt.get_cmap(cmap) cNorm = pltcolors.Normalize(vmin=0, vmax=1) scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=cm) if resolution is None: resolution = (max(original) - min(original)) / 100 mi = [] ma = [] for count, fts in enumerate(models, start=0): if fts.has_probability_forecasting and distributions[count]: density = fts.forecast_ahead_distribution( original[time_from - fts.order:time_from], time_to, resolution=resolution) #plot_density_scatter(ax, cmap, density, fig, resolution, time_from, time_to) plot_density_rectange(ax, cm, density, fig, resolution, time_from, time_to) if fts.has_interval_forecasting and intervals: forecasts = fts.forecast_ahead_interval( original[time_from - fts.order:time_from], time_to) lower = [kk[0] for kk in forecasts] upper = [kk[1] for kk in forecasts] mi.append(min(lower)) ma.append(max(upper)) for k in np.arange(0, time_from - fts.order): lower.insert(0, None) upper.insert(0, None) ax.plot(lower, color=colors[count], label=fts.shortname, linewidth=linewidth) ax.plot(upper, color=colors[count], linewidth=linewidth * 1.5) ax.plot(original, color='black', label="Original", linewidth=linewidth * 1.5) handles0, labels0 = ax.get_legend_handles_labels() if True in distributions: lgd = ax.legend(handles0, labels0, loc=2) else: lgd = ax.legend(handles0, labels0, loc=2, bbox_to_anchor=(1, 1)) _mi = min(mi) if _mi < 0: _mi *= 1.1 else: _mi *= 0.9 _ma = max(ma) if _ma < 0: _ma *= 0.9 else: _ma *= 1.1 ax.set_ylim([_mi, _ma]) ax.set_ylabel('F(T)') ax.set_xlabel('T') ax.set_xlim([0, len(original)]) cUtil.show_and_save_image(fig, file, save, lgd=lgd)
def plot_dataframe_point(file_synthetic, file_analytic, experiments, tam, save=False, file=None, sort_columns=['UAVG', 'RMSEAVG', 'USTD', 'RMSESTD'], sort_ascend=[1, 1, 1, 1], save_best=False, ignore=None, replace=None): fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam) axes[0].set_title('RMSE') axes[1].set_title('SMAPE') axes[2].set_title('U Statistic') dat_syn = pd.read_csv(file_synthetic, sep=";", usecols=point_dataframe_synthetic_columns()) bests = find_best(dat_syn, sort_columns, sort_ascend) dat_ana = pd.read_csv( file_analytic, sep=";", usecols=point_dataframe_analytic_columns(experiments)) data_columns = analytical_data_columns(experiments) if save_best: dat = pd.DataFrame.from_dict(bests, orient='index') dat.to_csv(Util.uniquefilename( file_synthetic.replace("synthetic", "best")), sep=";", index=False) rmse = [] smape = [] u = [] times = [] labels = [] for b in sorted(bests.keys()): if check_ignore_list(b, ignore): continue best = bests[b] tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"]) & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])] rmse.append(extract_measure(tmp, 'RMSE', data_columns)) smape.append(extract_measure(tmp, 'SMAPE', data_columns)) u.append(extract_measure(tmp, 'U', data_columns)) times.append(extract_measure(tmp, 'TIME', data_columns)) labels.append( check_replace_list(best["Model"] + " " + str(best["Order"]), replace)) axes[0].boxplot(rmse, labels=labels, autorange=True, showmeans=True) axes[0].set_title("RMSE") axes[1].boxplot(smape, labels=labels, autorange=True, showmeans=True) axes[1].set_title("SMAPE") axes[2].boxplot(u, labels=labels, autorange=True, showmeans=True) axes[2].set_title("U Statistic") plt.tight_layout() Util.show_and_save_image(fig, file, save)
def sliding_window_simple_search(data, windowsize, model, partitions, orders, **kwargs): _3d = len(orders) > 1 ret = [] errors = np.array([[0 for k in range(len(partitions))] for kk in range(len(orders))]) forecasted_best = [] figsize = kwargs.get('figsize', [10, 15]) fig = plt.figure(figsize=figsize) plotforecasts = kwargs.get('plotforecasts', False) if plotforecasts: ax0 = fig.add_axes([0, 0.4, 0.9, 0.5]) # left, bottom, width, height ax0.set_xlim([0, len(data)]) ax0.set_ylim([min(data) * 0.9, max(data) * 1.1]) ax0.set_title('Forecasts') ax0.set_ylabel('F(T)') ax0.set_xlabel('T') min_rmse = 1000000.0 best = None intervals = kwargs.get('intervals', False) threshold = kwargs.get('threshold', 0.5) progressbar = kwargs.get('progressbar', None) rng1 = enumerate(partitions, start=0) if progressbar: from tqdm import tqdm rng1 = enumerate(tqdm(partitions), start=0) for pc, p in rng1: fs = Grid.GridPartitioner(data=data, npart=p) rng2 = enumerate(orders, start=0) if progressbar: rng2 = enumerate(tqdm(orders), start=0) for oc, o in rng2: _error = [] for ct, train, test in Util.sliding_window(data, windowsize, 0.8, **kwargs): fts = model("q = " + str(p) + " n = " + str(o), partitioner=fs) fts.fit(train, order=o) if not intervals: forecasted = fts.forecast(test) if not fts.has_seasonality: _error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted[:-1]))) else: _error.append( Measures.rmse(np.array(test[o:]), np.array(forecasted))) for kk in range(o): forecasted.insert(0, None) if plotforecasts: ax0.plot(forecasted, label=fts.name) else: forecasted = fts.forecast_interval(test) _error.append(1.0 - Measures.rmse_interval( np.array(test[o:]), np.array(forecasted[:-1]))) error = np.nanmean(_error) errors[oc, pc] = error if (min_rmse - error) > threshold: min_rmse = error best = fts forecasted_best = forecasted # print(min_rmse) if plotforecasts: # handles0, labels0 = ax0.get_legend_handles_labels() # ax0.legend(handles0, labels0) elev = kwargs.get('elev', 30) azim = kwargs.get('azim', 144) ax0.plot(test, label="Original", linewidth=3.0, color="black") if _3d: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) if not plotforecasts: ax1 = Axes3D(fig, rect=[0, 1, 0.9, 0.9], elev=elev, azim=azim) # ax1 = fig.add_axes([0.6, 0.5, 0.45, 0.45], projection='3d') if _3d: ax1.set_title('Error Surface') ax1.set_ylabel('Model order') ax1.set_xlabel('Number of partitions') ax1.set_zlabel('RMSE') X, Y = np.meshgrid(partitions, orders) surf = ax1.plot_surface(X, Y, errors, rstride=1, cstride=1, antialiased=True) else: ax1 = fig.add_axes([0, 1, 0.9, 0.9]) ax1.set_title('Error Curve') ax1.set_ylabel('Number of partitions') ax1.set_xlabel('RMSE') ax0.plot(errors, partitions) ret.append(best) ret.append(forecasted_best) # plt.tight_layout() file = kwargs.get('file', None) save = kwargs.get('save', False) Util.show_and_save_image(fig, file, save) return ret
def unified_scaled_interval( experiments, tam, save=False, file=None, sort_columns=['COVAVG', 'SHARPAVG', 'COVSTD', 'SHARPSTD'], sort_ascend=[True, False, True, True], save_best=False, ignore=None, replace=None): fig, axes = plt.subplots(nrows=3, ncols=1, figsize=tam) axes[0].set_title('Sharpness') axes[1].set_title('Resolution') axes[2].set_title('Coverage') models = {} for experiment in experiments: mdl = {} dat_syn = pd.read_csv(experiment[0], sep=";", usecols=interval_dataframe_synthetic_columns()) bests = find_best(dat_syn, sort_columns, sort_ascend) dat_ana = pd.read_csv(experiment[1], sep=";", usecols=interval_dataframe_analytic_columns( experiment[2])) sharpness = [] resolution = [] coverage = [] times = [] data_columns = analytical_data_columns(experiment[2]) for b in sorted(bests.keys()): if check_ignore_list(b, ignore): continue if b not in models: models[b] = {} models[b]['sharpness'] = [] models[b]['resolution'] = [] models[b]['coverage'] = [] models[b]['times'] = [] if b not in mdl: mdl[b] = {} mdl[b]['sharpness'] = [] mdl[b]['resolution'] = [] mdl[b]['coverage'] = [] mdl[b]['times'] = [] best = bests[b] print(best) tmp = dat_ana[(dat_ana.Model == best["Model"]) & (dat_ana.Order == best["Order"]) & (dat_ana.Scheme == best["Scheme"]) & (dat_ana.Partitions == best["Partitions"])] tmpl = extract_measure(tmp, 'Sharpness', data_columns) mdl[b]['sharpness'].extend(tmpl) sharpness.extend(tmpl) tmpl = extract_measure(tmp, 'Resolution', data_columns) mdl[b]['resolution'].extend(tmpl) resolution.extend(tmpl) tmpl = extract_measure(tmp, 'Coverage', data_columns) mdl[b]['coverage'].extend(tmpl) coverage.extend(tmpl) tmpl = extract_measure(tmp, 'TIME', data_columns) mdl[b]['times'].extend(tmpl) times.extend(tmpl) models[b]['label'] = check_replace_list( best["Model"] + " " + str(best["Order"]), replace) sharpness_param = scale_params(sharpness) resolution_param = scale_params(resolution) coverage_param = scale_params(coverage) times_param = scale_params(times) for key in sorted(models.keys()): models[key]['sharpness'].extend( scale(mdl[key]['sharpness'], sharpness_param)) models[key]['resolution'].extend( scale(mdl[key]['resolution'], resolution_param)) models[key]['coverage'].extend( scale(mdl[key]['coverage'], coverage_param)) models[key]['times'].extend(scale(mdl[key]['times'], times_param)) sharpness = [] resolution = [] coverage = [] times = [] labels = [] for key in sorted(models.keys()): sharpness.append(models[key]['sharpness']) resolution.append(models[key]['resolution']) coverage.append(models[key]['coverage']) times.append(models[key]['times']) labels.append(models[key]['label']) axes[0].boxplot(sharpness, labels=labels, autorange=True, showmeans=True) axes[1].boxplot(resolution, labels=labels, autorange=True, showmeans=True) axes[2].boxplot(coverage, labels=labels, autorange=True, showmeans=True) plt.tight_layout() Util.show_and_save_image(fig, file, save)