def set_xylabels(axes, list_timeseries): pd.auto_ylabels(axes, list_timeseries) xlim = axes.get_xlim() window = xlim[1] - xlim[0] if window < 5.: pd.set_xaxis_dateformat(axes, date_format="%m/%d/%y %H:%M", pad=10) pd.rotate_xticks(axes, angle=20) elif window < 15.: pd.set_xaxis_day(axes, rotate=20, pad=10) elif window > 160.: pd.set_xaxis_month(axes, rotate=20, pad=10) else: pd.set_xaxis_dateformat(axes, date_format="%m/%d/%y", pad=10) pd.rotate_xticks(axes, angle=20)
def plot_scatter(ax, tss): """ Plat a scatter plot """ # Get common time window. Use the first two time series if tss is None or len(tss) < 2: ax.set_visible(False) return # if metrics['lag'] is None: # ax.set_visible(False) # return if any([ts is None for ts in tss[:2]]): ax.set_visible(False) return ts_base = tss[0] ts_target = tss[1] nonnan_flag = np.logical_not( np.logical_or(np.isnan(ts_base.data), np.isnan(ts_target.data))) ts_target = ts_target.data[nonnan_flag] ts_base = ts_base.data[nonnan_flag] ax.grid(True, linestyle='-', linewidth=0.1, color='0.5') artist = ax.scatter(ts_base, ts_target) # if self._have_regression is True: # self.add_regression_line(ts_base, ts_target) add_regression_line(ax, ts_base, ts_target) set_scatter_color(artist) make_plot_isometric(ax) labels = ['Obs', 'Sim'] unit = tss[0].props.get('unit') labels = [l + " (%s)" % unit for l in labels] ax.set_xlabel(labels[0]) ax.set_ylabel(labels[1]) rotate_xticks(ax, 25)
def plot_scatter(axes, ts1, ts2, labels=None): """ Plot a scatter plot with a regression line """ # if len(ts1_common) != len(ts2_common): # raise ValueError("Length of the two time series must be the same.") d1 = ts1.data d2 = ts2.data # Remove nan pairs nonnan_flag = np.logical_not(np.logical_or(np.isnan(d1), np.isnan(d2))) d1 = d1[nonnan_flag] d2 = d2[nonnan_flag] # Draw h = axes.scatter(d1, d2) plt.setp(h, alpha=0.15, edgecolor='grey', facecolor=mpl.rcParams['axes.color_cycle'][0]) if len(d1) < 2: return # Regression model, resid = np.linalg.lstsq(np.vstack([d1, np.ones(len(d1))]).T, d2)[:2] # r2 = 1. - resid / (len(d2) * d2.var()) x = np.array([min(d1), max(d1)]) y = model[0] * x + model[1] l, = axes.plot(x, y, color=mpl.rcParams['axes.color_cycle'][1]) # Text info if model[1] >= 0.: eqn = "Y=%.3f*X+%.3f" % (model[0], model[1]) # Calculate the linear regression else: eqn = "Y=%.3f*X-%.3f" % (model[0], -model[1]) axes.legend([ l, ], [ eqn, ], loc='upper left', prop={'size': 10}) xlim = axes.get_xlim() ylim = axes.get_ylim() common_lim = (min(xlim[0], ylim[0]), max(xlim[1], ylim[1])) axes.set_xlim(*common_lim) axes.set_ylim(*common_lim) axes.set_aspect('equal') pd.rotate_xticks(axes, 20) axes.tick_params(axis='x', pad=10) if labels is None: labels = ['Obs', 'Sim'] if 'unit' in ts1.props.keys(): unit_x = ts1.props['unit'] else: unit_x = None if unit_x == 'meter': unit_x = 'm' if unit_x is not None: xlabel = labels[0] + " (%s)" % unit_x axes.set_xlabel(xlabel) ylabel = labels[1] + " (%s)" % unit_x axes.set_ylabel(ylabel)
def metrics(params): init_logger() logger = logging.getLogger("metrics") working_dir = params["data dir"] time_basis = params["time basis"] start_avg = params["start avg"] end_avg = params["end avg"] start_inst = params["start inst"] end_inst = params["end inst"] labels_base = params["labels"] db_stations = station_db.StationDB(params["stations csv"]) list_obs_fname = params["obs links csv"] db_obs = obs_links.ObsLinks(list_obs_fname) pad = datetime.timedelta(days=3) max_gap_to_fill = vtools.data.vtime.time_interval(hours=1) variable = params["variable"] if variable == 'flow': station_in = os.path.join(working_dir, "flowlines.yaml") staout = station_extractor.flow_extractor(station_in, working_dir, time_basis) staout.flow_fname = "flux.dat" else: station_in = os.path.join(working_dir, "station.in") staout = station_extractor.station_extractor(station_in, working_dir, time_basis) use_alias = False pd.set_color_cycle_dark2() mp = schism_postprocess.metrics_plot() for station in staout.stations: station_id = station["name"] logger.info("Processing station: %s" % station_id) alias = db_stations.alias(station_id) station_work = station_id if alias is None: logger.error("Station is not in the station database: %s" % station_id) continue long_name = db_stations.name(station_id) # Simulation if variable == 'flow': ts_sim = staout.retrieve_ts(station_id) data_expected = db_stations.station_attribute(station_id,"Flow") else: if variable == 'elev': ts_sim = staout.retrieve_ts(variable, name=station_id) data_expected = db_stations.station_attribute(station_id,"Stage") elif variable in ['salt', ]: vert_pos = station["vert_pos"] data_expected = db_stations.station_attribute(station_id,"WQ") ts_sim = staout.retrieve_ts(variable, name=station_id, depth=vert_pos) if ts_sim is None: logger.warning("This station is not in staout: %s %s: " %(station_id, long_name)) continue if np.all(np.isnan(ts_sim.data)): logger.warning("All simulated values are nan.") continue # Observation if variable == "salt": obs_fname = db_obs.filename(station_id, variable, vert_pos=vert_pos) else: obs_fname = db_obs.filename(station_id, variable) if obs_fname is None and use_alias: if variable == 'salt': obs_fname = db_obs.filename(alias, variable, vert_pos=vert_pos) else: obs_fname = db_obs.filename(alias, variable) station_work = alias if alias != station_id: alias_use = alias + " " + long_name[:32] else: alias_use = long_name[:32] if obs_fname is None: expectstr = "(Omission)" if data_expected else "(Data not expected)" level = logging.WARNING if data_expected else logging.DEBUG logger.log(level,"%s No %s data link listing for: %s (%s)" % (expectstr,variable,station_id,alias_use)) continue else: if not data_expected: logger.warning("File link %s found for station %s but station not expected to have data for variable %s" % (obs_fname,station_id,variable)) logger.info("Observed file for id %s: %s" % (station_id,obs_fname)) if not os.path.exists(obs_fname): logger.error("Observation file path not found on file system: %s" % obs_fname) continue ts_obs = read_ts.read_ts(obs_fname, start_avg - pad, end_avg + pad) if ts_obs is None: logger.warning("This obs file does not cover the period: %s" % obs_fname) continue # Fill gaps if np.any(np.isnan(ts_obs.data)): logger.debug("Filling gaps ...") max_gap = int(max_gap_to_fill.total_seconds() / ts_obs.interval.total_seconds()) if max_gap == 0: max_gap += 1 ts_obs = vtools.functions.api.interpolate_ts_nan(ts_obs, max_gap=max_gap) # agency = db_obs.get_agency(station_id, variable) obs_unit = db_obs.unit(station_work, variable) logger.debug("obs_unit %s" % obs_unit) ts_obs.props["unit"] = obs_unit if ts_obs.props["unit"] == 'ft': unit_conversions.ft_to_m(ts_obs) elif ts_obs.props["unit"] == "cfs": logger.debug("Convert cfs to cms....") unit_conversions.cfs_to_cms(ts_obs) elif ts_obs.props["unit"] == "ec": logger.debug("Convert ec to psu...") unit_conversions.ec_psu_25c(ts_obs) datum_adj = db_obs.datum_adjust(station_work, variable) if datum_adj != 0.: ts_obs += datum_adj datum = db_obs.vdatum(station_work, variable) adj = 0. if variable == 'elev' and datum == 'STND': tss = schism_postprocess.window_list_timeseries((ts_obs, ts_sim)) adj = np.average(tss[1].data) - np.average(tss[0].data) ts_obs += adj label_obs = labels_base[0] if adj != 0.: if adj > 0.: label_obs += " + %g" % adj else: label_obs += u" \u2212 %g" % (-adj) labels = [label_obs,] labels.extend(labels_base[1:]) mp.plot_metrics((ts_obs, ts_sim), labels=labels) mp.axes_inst.set_xlim(start_inst, end_inst) pd.set_xaxis_dateformat(mp.axes_inst, "%m/%d/%y") pd.rotate_xticks(mp.axes_inst, 30) mp.axes_filtered.set_xlim(start_avg, end_avg) pd.set_xaxis_dateformat(mp.axes_filtered, "%m/%d/%y") pd.rotate_xticks(mp.axes_filtered, 30) mp.axes_inst.legend(prop={'size': 12}) if variable in ["salt",]: if vert_pos == 0: title = "%s, Surface, (%s)" % (long_name, alias) fout_name = "%s_%s_surface.png" % (variable, alias) elif vert_pos == 1: title = "%s, Bottom, (%s)" % (long_name, alias) fout_name = "%s_%s_bottom.png" % (variable, alias) else: title = "%s (%s)" % (long_name, alias) fout_name = "%s_%s.png" % (variable, alias) mp.set_title(title) plt.savefig(fout_name, dpi=300) print "See metrics_errors.log for errors such as missed stations"