def get_station_objects(start_year=1980, end_year=2010, sel_names=None): # read ice depth values df = get_obs_data() lon_min, lon_max = -100, 0 lat_min, lat_max = 40, 90 nvals_min = 100 p = parser.parse(STATION_COORDS_FILE.open()) root = p.getroot() station_elts = root.Document.Placemark # select points based on the lat/lon limits? stations = [] for el in station_elts: lon, lat, _ = [float(c.strip()) for c in el.Point.coordinates.text.split(",")] # Check if the station if sel_names is not None: is_ok = False for sel_name in sel_names: if sel_name.lower() in el.name.text.lower(): is_ok = True break if not is_ok: continue if (lon_min <= lon <= lon_max) and (lat_min <= lat <= lat_max): print("{}: {}".format(el.name, el.Point.coordinates)) df_s = df.loc[df.station_name.str.lower().str.startswith(el.name.text.lower())] df_s = df_s.loc[(df_s.year >= start_year) & (df_s.year <= end_year)] if len(df_s) < nvals_min: continue print(len(df_s)) d_to_v = dict(zip(df_s["Date"][:], df_s["ice_depth"][:])) # df_s.plot(x="Date", y="ice_depth") # plt.title(el.name.text) # plt.show() # print(df_s.station_name) stations.append(Station(st_id=df_s.station_name.iloc[0], lon=lon, lat=lat, date_to_value=d_to_v)) return stations
def load_stations_from_csv( index_file="mh/obs_data/streamflow_data_organized/station_index.txt", selected_ids=None): res = [] data_dir = Path(index_file).parent with open(index_file) as f: # skip the first line f.readline() for line in f: if line.strip() == "": continue toks = re.split("\s+", line) st_id = toks[0].strip() if (selected_ids is not None) and (st_id not in selected_ids): continue lon, lat, = [float(tok.strip()) for tok in toks[1:3]] st_da = None try: st_da = float(toks[3].strip()) except Exception: pass st_name = " ".join(toks[5:]).split(",")[0] s = Station(st_id=st_id, lon=lon, lat=lat, name=st_name) s.source_data_units = toks[4].strip() s.drainage_km2 = st_da print(s) ts = read_data_file_for_station( s, data_file=Path(data_dir.joinpath("{}.csv".format(s.id)))) ts.dropna(inplace=True) # if it is date do nothing if hasattr(ts.iloc[0, 0], "year"): pass # convert to dates if it is just a year elif isinstance(ts.iloc[0, 0], str): date_format = None # try different known date formats for the_date_format in known_date_formats: try: datetime.strptime(ts.iloc[0, 0], the_date_format) date_format = the_date_format except Exception: pass if date_format is None: raise Exception( "Do not understand this date format: {}".format( ts.iloc[0, 0])) ts[0] = [ datetime.strptime(t, date_format) for t in ts.iloc[:, 0] ] elif float(ts.iloc[ 0, 0]).is_integer(): # in case we have only year values ts[0] = [datetime(int(y), 6, 15) for y in ts.iloc[:, 0]] else: print(ts.iloc[0, 0]) raise Exception("Could not convert {} to a date".format( ts.iloc[0, 0])) print(ts.head()) # start - plot for debug # fig = plt.figure() # ax = plt.gca() # ax.set_title(s.id) # ts.plot(ax=ax, x=0, y=1) # fig.autofmt_xdate() # # img_file = img_folder.joinpath("{}.png".format(s.id)) # fig.savefig(str(img_file)) # end - plot for debug set_data_from_pandas_timeseries(ts, s, date_col=0) res.append(s) return res