Example #1
0
def get_station_objects(start_year=1980, end_year=2010, sel_names=None):
    # read ice depth values
    df = get_obs_data()

    lon_min, lon_max = -100, 0
    lat_min, lat_max = 40, 90

    nvals_min = 100

    p = parser.parse(STATION_COORDS_FILE.open())

    root = p.getroot()

    station_elts = root.Document.Placemark

    # select points based on the lat/lon limits?
    stations = []
    for el in station_elts:

        lon, lat, _ = [float(c.strip()) for c in el.Point.coordinates.text.split(",")]

        # Check if the station
        if sel_names is not None:

            is_ok = False

            for sel_name in sel_names:
                if sel_name.lower() in el.name.text.lower():
                    is_ok = True
                    break

            if not is_ok:
                continue

        if (lon_min <= lon <= lon_max) and (lat_min <= lat <= lat_max):
            print("{}: {}".format(el.name, el.Point.coordinates))

            df_s = df.loc[df.station_name.str.lower().str.startswith(el.name.text.lower())]

            df_s = df_s.loc[(df_s.year >= start_year) & (df_s.year <= end_year)]

            if len(df_s) < nvals_min:
                continue

            print(len(df_s))
            d_to_v = dict(zip(df_s["Date"][:], df_s["ice_depth"][:]))

            # df_s.plot(x="Date", y="ice_depth")
            # plt.title(el.name.text)
            # plt.show()

            # print(df_s.station_name)

            stations.append(Station(st_id=df_s.station_name.iloc[0], lon=lon, lat=lat, date_to_value=d_to_v))

    return stations
Example #2
0
def load_stations_from_csv(
        index_file="mh/obs_data/streamflow_data_organized/station_index.txt",
        selected_ids=None):

    res = []

    data_dir = Path(index_file).parent

    with open(index_file) as f:

        # skip the first line
        f.readline()

        for line in f:
            if line.strip() == "":
                continue

            toks = re.split("\s+", line)

            st_id = toks[0].strip()

            if (selected_ids is not None) and (st_id not in selected_ids):
                continue

            lon, lat, = [float(tok.strip()) for tok in toks[1:3]]
            st_da = None
            try:
                st_da = float(toks[3].strip())
            except Exception:
                pass

            st_name = " ".join(toks[5:]).split(",")[0]

            s = Station(st_id=st_id, lon=lon, lat=lat, name=st_name)
            s.source_data_units = toks[4].strip()
            s.drainage_km2 = st_da

            print(s)

            ts = read_data_file_for_station(
                s, data_file=Path(data_dir.joinpath("{}.csv".format(s.id))))

            ts.dropna(inplace=True)

            # if it is date do nothing
            if hasattr(ts.iloc[0, 0], "year"):
                pass
            # convert to dates if it is just a year
            elif isinstance(ts.iloc[0, 0], str):
                date_format = None
                # try different known date formats
                for the_date_format in known_date_formats:
                    try:
                        datetime.strptime(ts.iloc[0, 0], the_date_format)
                        date_format = the_date_format
                    except Exception:
                        pass

                if date_format is None:
                    raise Exception(
                        "Do not understand this date format: {}".format(
                            ts.iloc[0, 0]))

                ts[0] = [
                    datetime.strptime(t, date_format) for t in ts.iloc[:, 0]
                ]

            elif float(ts.iloc[
                    0, 0]).is_integer():  # in case we have only year values
                ts[0] = [datetime(int(y), 6, 15) for y in ts.iloc[:, 0]]

            else:
                print(ts.iloc[0, 0])
                raise Exception("Could not convert {} to a date".format(
                    ts.iloc[0, 0]))

            print(ts.head())

            # start - plot for debug

            # fig = plt.figure()
            # ax = plt.gca()
            # ax.set_title(s.id)
            # ts.plot(ax=ax, x=0, y=1)
            # fig.autofmt_xdate()
            #
            # img_file = img_folder.joinpath("{}.png".format(s.id))
            # fig.savefig(str(img_file))

            # end - plot for debug

            set_data_from_pandas_timeseries(ts, s, date_col=0)

            res.append(s)

    return res