def load_stations_from_csv( index_file="mh/obs_data/streamflow_data_organized/station_index.txt", selected_ids=None): res = [] data_dir = Path(index_file).parent with open(index_file) as f: # skip the first line f.readline() for line in f: if line.strip() == "": continue toks = re.split("\s+", line) st_id = toks[0].strip() if (selected_ids is not None) and (st_id not in selected_ids): continue lon, lat, = [float(tok.strip()) for tok in toks[1:3]] st_da = None try: st_da = float(toks[3].strip()) except Exception: pass st_name = " ".join(toks[5:]).split(",")[0] s = Station(st_id=st_id, lon=lon, lat=lat, name=st_name) s.source_data_units = toks[4].strip() s.drainage_km2 = st_da print(s) ts = read_data_file_for_station( s, data_file=Path(data_dir.joinpath("{}.csv".format(s.id)))) ts.dropna(inplace=True) # if it is date do nothing if hasattr(ts.iloc[0, 0], "year"): pass # convert to dates if it is just a year elif isinstance(ts.iloc[0, 0], str): date_format = None # try different known date formats for the_date_format in known_date_formats: try: datetime.strptime(ts.iloc[0, 0], the_date_format) date_format = the_date_format except Exception: pass if date_format is None: raise Exception( "Do not understand this date format: {}".format( ts.iloc[0, 0])) ts[0] = [ datetime.strptime(t, date_format) for t in ts.iloc[:, 0] ] elif float(ts.iloc[ 0, 0]).is_integer(): # in case we have only year values ts[0] = [datetime(int(y), 6, 15) for y in ts.iloc[:, 0]] else: print(ts.iloc[0, 0]) raise Exception("Could not convert {} to a date".format( ts.iloc[0, 0])) print(ts.head()) # start - plot for debug # fig = plt.figure() # ax = plt.gca() # ax.set_title(s.id) # ts.plot(ax=ax, x=0, y=1) # fig.autofmt_xdate() # # img_file = img_folder.joinpath("{}.png".format(s.id)) # fig.savefig(str(img_file)) # end - plot for debug set_data_from_pandas_timeseries(ts, s, date_col=0) res.append(s) return res