Esempio n. 1
0
 def test_location_codes(self, invdf, input, expected):
     """make sure location codes are handled nicely"""
     invdf = invdf.copy()
     if isinstance(input, int):
         invdf = invdf.loc[invdf["station"] == "RJOB"]
         invdf["location"] = input
         invdf["location"] = invdf["location"].astype(int)
     elif isinstance(input, float):
         invdf["location"] = np.nan
         invdf.loc[invdf["station"] == "RJOB", "location"] = input
     else:
         invdf.loc[invdf["station"] == "RJOB", "location"] = input
     # breakpoint()
     invdf = stations_to_df(invdf)
     rjob = invdf.loc[invdf["station"] == "RJOB"]
     not_rjob = invdf.loc[invdf["station"] != "RJOB"]
     assert (rjob["location"] == expected).all()
     assert (not_rjob["location"] == "").all()
     assert (invdf["location"] == invdf["seed_id"].str.split(
         ".",
         expand=True)[2]).all()  # This one will be a little bit tougher
Esempio n. 2
0
    def df_with_get_stations_kwargs(self):
        """
        Add response information to the dataframe using get_stations_kwargs.

        Add an additional station which will need to get all data from other
        columns.
        """
        _inv = obsplus.load_dataset(
            "bingham_test").station_client.get_stations()
        inv = _inv.select(station="NOQ")

        with suppress_warnings():
            df = obsplus.stations_to_df(inv).reset_index()

        # set get_station_kwargs for last two channels, leave first empty
        kwargs_list = [""]
        for ind, row in df.iloc[1:].iterrows():
            kwargs = {x: row[x] for x in NSLC}
            kwargs["endafter"] = str(to_utc(row["start_date"]))
            kwargs_list.append(kwargs)
        # set last kwargs to str to simulate reading from csv
        kwargs_list[-1] = str(kwargs_list[-1])
        df["get_station_kwargs"] = kwargs_list
        # set the first kwargs to a string to make sure it can be parsed
        # this is important for eg reading data from a csv.
        df.loc[0, "get_station_kwargs"] = str(df.loc[0, "get_station_kwargs"])
        # now add a row with an empty get_station_kwargs column
        old = dict(df.iloc[0])
        new = {
            "station": "CWU",
            "network": "UU",
            "channel": "EHZ",
            "location": "01",
            "seed_id": "UU.CWU.01.EHZ",
            "get_station_kwargs": "{}",
        }
        old.update(new)
        ser = pd.Series(old)
        return df.append(ser, ignore_index=True)
Esempio n. 3
0
    def __init__(
        self,
        catalog: Catalog,
        inventory: Inventory,
        phases: Optional[Collection[str]] = None,
        restrict_to_arrivals: bool = True,
    ):
        # check inputs
        # st_dict, catalog = self._validate_inputs(catalog, inventory, st_dict)
        catalog = catalog.copy()
        events = Catalog(
            events=[catalog]) if isinstance(catalog, Event) else catalog
        # Convert inventory to a dataframe if it isn't already
        inv_df = obsplus.stations_to_df(inventory)
        inv_df.set_index("seed_id", inplace=True)
        # get a df of all input data, perform sanity checks
        event_station_df = SpatialCalculator()(events, inv_df)
        # Calculate hypocentral distance
        event_station_df["hyp_distance_m"] = np.sqrt(
            event_station_df["distance_m"]**2 +
            event_station_df["vertical_distance_m"]**2)
        event_station_df.index.names = ["event_id", "seed_id"]
        # we need additional info from the stations, get it and join.
        self.event_station_df = self._join_station_info(
            inv_df, event_station_df)

        # self._join_station_info()
        df = self._get_meta_df(events,
                               phases=phases,
                               restrict_to_arrivals=restrict_to_arrivals)
        self.data = df
        # st_dict, catalog = self._validate_inputs(catalog, st_dict)
        # # get a df of all input data, perform sanity checks
        # df = self._get_meta_df(catalog, st_dict, phases=phases)
        # self.data = df
        # # add sampling rate to stats
        # self._stats = AttribDict(motion_type=motion_type)
        # init cache
        self._cache = {}
Esempio n. 4
0
    def _make_inventory(self, df: pd.DataFrame):
        """
        Loopy logic for creating the inventory form a dataframe.
        """
        # get dataframe with correct columns/conditioning from input
        df = obsplus.stations_to_df(df).copy()
        # add responses (if requested) and drop response cols
        df["response"] = self._get_responses(df)
        df = df.drop(columns=self._drop_cols, errors="ignore")
        # warn if any unexpected columns are found in df
        self._maybe_warn_on_unexpected_columns(df)
        # Iterate networks and create stations
        networks = []
        for net_code, net_df in self._groupby_if_exists(df, "network"):
            stations = []
            for st_code, sta_df in self._groupby_if_exists(net_df, "station"):
                if not st_code[0]:
                    continue
                channels = []
                for ch_code, ch_df in self._groupby_if_exists(sta_df, "channel"):
                    if not ch_code[0]:  # skip empty channel lines
                        continue
                    chan_series = ch_df.iloc[0]
                    kwargs = self._get_kwargs(chan_series, self.cha_map)
                    # try to add the inventory
                    channels.append(Channel(**kwargs))
                kwargs = self._get_kwargs(sta_df.iloc[0], self.sta_map)
                self._add_dates(kwargs, channels)
                stations.append(Station(channels=channels, **kwargs))
            kwargs = self._get_kwargs(net_df.iloc[0], self.net_map)
            self._add_dates(kwargs, stations)
            networks.append(Network(stations=stations, **kwargs))

        return obspy.Inventory(
            networks=networks, source=f"ObsPlus_v{obsplus.__version__}"
        )
Esempio n. 5
0
 def inv_df(self):
     """ return a small dataframe for manipulating """
     df = stations_to_df(obspy.read_inventory())
     return df
Esempio n. 6
0
 def test_filter_station(self, inventory):
     """ensure stations can be filtered"""
     inv = inventory.get_stations(station="WET")
     df = obsplus.stations_to_df(inv)
     assert set(df.station) == {"WET"}
Esempio n. 7
0
 def inv_df(self, request):
     """ collect all the supported inputs are parametrize"""
     name = request.param
     return stations_to_df(getattr(self, name))
Esempio n. 8
0
 def df_from_inv(self):
     """ read events from a events object """
     inv = obspy.read_inventory()
     return stations_to_df(inv)
Esempio n. 9
0
 def df_from_inv(self):
     """convert the default inventory to a df and return."""
     inv = obspy.read_inventory()
     return obsplus.stations_to_df(inv)
Esempio n. 10
0
 def test_basic_inventories(self, station_cache_inventory):
     df = stations_to_df(station_cache_inventory)
     assert isinstance(df, pd.DataFrame)
     assert not df.empty
Esempio n. 11
0
 def test_kem_catalog(self):
     """ test converting the kemmerer catalog to an inv dataframe. """
     ds = obsplus.load_dataset("kemmerer")
     df = stations_to_df(ds.event_client.get_events())
     assert isinstance(df, pd.DataFrame)
     assert not df.empty
Esempio n. 12
0
 def test_kem_catalog(self, bingham_dataset):
     """test converting the kemmerer catalog to an inv dataframe."""
     df = stations_to_df(bingham_dataset.event_client.get_events())
     assert isinstance(df, pd.DataFrame)
     assert not df.empty
Esempio n. 13
0
 def test_stream_to_inv(self):
     """A stream also contains station info."""
     st = obspy.read()
     df = obsplus.stations_to_df(st)
     assert isinstance(df, pd.DataFrame)
     assert len(df) == len(st)
Esempio n. 14
0
 def ta_inv_df(self, ta_inventory):
     """Return the ta_test inventory as a dataframe."""
     return obsplus.stations_to_df(ta_inventory)
Esempio n. 15
0
 def inv_df(self, request):
     """collect all the supported inputs are parametrize"""
     value = request.getfixturevalue(request.param)
     return stations_to_df(value)
Esempio n. 16
0
 def read_inventory(self, inv_directory):
     """Convert the inventory directory to a dataframe."""
     with suppress_warnings():
         return stations_to_df(inv_directory)
Esempio n. 17
0
 def test_filter_channel_star_wild(self, inventory):
     """ensure filtering can be done with *"""
     inv = inventory.get_stations(channel="*z")
     df = obsplus.stations_to_df(inv)
     assert all([x.endswith("Z") for x in set(df.channel)])
Esempio n. 18
0
 def test_filter_channel_single_wild(self, inventory):
     """ensure filtering can be done on str attrs with ?"""
     inv = inventory.get_stations(channel="HH?")
     df = obsplus.stations_to_df(inv)
     assert all([x.startswith("HH") for x in set(df.channel)])
Esempio n. 19
0
 def df_bad_location(self, inv_df):
     """ make location codes nan, run through read_inventory """
     inv_df.loc[:, "location"] = np.NaN
     return stations_to_df(inv_df)
 def inv_df(self, request):
     """ collect all the supported inputs are parametrize"""
     return stations_to_df(request.param)
Esempio n. 21
0
 def test_idempotency(self, inv_df):
     """ ensure the inv_df function is idempotent """
     inv_df2 = stations_to_df(inv_df)
     assert inv_df2.equals(inv_df)
     assert inv_df2 is not inv_df
Esempio n. 22
0
 def test_all_seed_id_in_df(self, distance_df, inv):
     seed_id_stations = set(obsplus.stations_to_df(inv)["seed_id"])
     seed_id_df = set(distance_df.index.to_frame()["id2"])
     assert seed_id_df == seed_id_stations
Esempio n. 23
0
 def invdf(self, test_inventory):
     """ return the dataframe produced from stations"""
     return stations_to_df(test_inventory)
Esempio n. 24
0
 def test_all_seed_id_in_df(self, distance_df, inv):
     """Ensure all the seed ids are in the dataframe."""
     seed_id_stations = set(obsplus.stations_to_df(inv)["seed_id"])
     seed_id_df = set(distance_df.index.to_frame()["id2"])
     assert seed_id_df == seed_id_stations
Esempio n. 25
0
 def wavebank_station_df(self, crandall_bank):
     """ Return the station df from a wavebank """
     return stations_to_df(crandall_bank)
Esempio n. 26
0
 def read_inventory(self, inv_directory):
     return stations_to_df(inv_directory)
Esempio n. 27
0
def df_to_inventory(df) -> obspy.Inventory:
    """
    Create a station inventory from a dataframe.

    Parameters
    ----------
    df
        A dataframe which must have the same columns as the once produced by
        :func:`obsplus.stations_to_df`.

    Notes
    -----
    The dataframe can also contain columns named "sensor_keys" and
    "datalogger_keys" which will indicate the response information should
    be fetched suing obspy's ability to interact with the nominal response
    library. Each of these columns should either contain tuples or strings
    where the keys are separated by double underscores (__).
    """
    def _make_key_mappings(cls):
        """ Create a mapping from columns in df to kwargs for cls. """
        base_params = set(inspect.signature(cls).parameters)
        new_map = mapping_keys[cls]
        base_map = {x: x for x in base_params - set(new_map)}
        base_map.update(new_map)
        return base_map

    def _groupby_if_exists(df, columns):
        """ Groupby columns if they exist on dataframe, else return empty. """
        cols = list(obsplus.utils.iterate(columns))
        if not set(cols).issubset(df.columns):
            return

        # copy df and set missing start/end times to reasonable values
        # this is needed so they get included in a groupby
        df = df.copy()
        isnan = df.isna()
        default_start = pd.Timestamp(SMALLDT64)
        default_end = pd.Timestamp(LARGEDT64)

        if "start_date" in columns:
            df["start_date"] = df["start_date"].fillna(default_start)
        if "end_date" in columns:
            df["end_date"] = df["end_date"].fillna(default_end)

        for ind, df_sub in df.groupby(cols):
            # replace NaN values
            if isnan.any().any():
                df_sub[isnan.loc[df_sub.index]] = np.nan
            yield ind, df_sub

    def _get_kwargs(series, key_mapping):
        """ create the kwargs from a series and key mapping. """
        out = {}
        for k, v in key_mapping.items():
            # skip if requested kwarg is not in the series
            if v not in series:
                continue
            value = series[v]
            value = value if not pd.isnull(value) else None
            # if the type needs to be cast to something else
            if k in type_mappings and value is not None:
                value = type_mappings[k](value)
            out[k] = value

        return out

    @lru_cache()
    def get_nrl():
        """ Initiate a nominal response library object. """
        from obspy.clients.nrl import NRL

        return NRL()

    @lru_cache()
    def get_response(datalogger_keys, sensor_keys):
        nrl = get_nrl()
        kwargs = dict(datalogger_keys=datalogger_keys, sensor_keys=sensor_keys)
        return nrl.get_response(**kwargs)

    def _get_resp_key(key):
        """ Get response keys from various types. """
        if isinstance(key, str) or key is None:
            return tuple((key or "").split("__"))
        else:
            return tuple(key)

    def _maybe_add_response(series, channel_kwargs):
        """ Maybe add the response information if required columns exist. """
        # bail out of required columns do not exist
        if not {"sensor_keys", "datalogger_keys"}.issubset(set(series.index)):
            return
        # determine if both required columns are populated, else bail out
        sensor_keys = _get_resp_key(series["sensor_keys"])
        datalogger_keys = _get_resp_key(series["datalogger_keys"])
        if not (sensor_keys and datalogger_keys):
            return
        # at this point all the required info for resp lookup should be there
        channel_kwargs["response"] = get_response(datalogger_keys, sensor_keys)

    # Deal with pandas dtype weirdness
    # TODO remove this when custom column functions are supported by DataFrame
    #  Extractor (part of the big refactor in #131)
    for col in NSLC:
        df[col] = df[col].astype(str).str.replace(".0", "")

    # first get key_mappings
    net_map = _make_key_mappings(Network)
    sta_map = _make_key_mappings(Station)
    cha_map = _make_key_mappings(Channel)
    # next define columns groupbys should be performed on
    net_columns = ["network"]
    sta_columns = ["station", "start_date", "end_date"]
    cha_columns = ["channel", "location", "start_date", "end_date"]
    # Ensure input is a dataframe
    df = obsplus.stations_to_df(df)
    # Iterate networks and create stations
    networks = []
    for net_code, net_df in _groupby_if_exists(df, net_columns):
        stations = []
        for st_code, sta_df in _groupby_if_exists(net_df, sta_columns):
            channels = []
            for ch_code, ch_df in _groupby_if_exists(sta_df, cha_columns):
                chan_series = ch_df.iloc[0]
                kwargs = _get_kwargs(chan_series, cha_map)
                # try to add the inventory
                _maybe_add_response(chan_series, kwargs)
                channels.append(Channel(**kwargs))
            kwargs = _get_kwargs(sta_df.iloc[0], sta_map)
            stations.append(Station(channels=channels, **kwargs))
        kwargs = _get_kwargs(net_df.iloc[0], net_map)
        networks.append(Network(stations=stations, **kwargs))

    return obspy.Inventory(networks=networks,
                           source=f"ObsPlus_v{obsplus.__version__}")
Esempio n. 28
0
 def test_something(self, read_inventory, inventory):
     inv_df = stations_to_df(inventory)
     assert (read_inventory.columns == inv_df.columns).all()
     assert not read_inventory.empty
     assert len(inv_df) == len(read_inventory)
     assert set(inv_df["seed_id"]) == set(read_inventory["seed_id"])
Esempio n. 29
0
 def df_from_inv_from_df(self, inv_from_df):
     """Is this getting confusing yet?"""
     return obsplus.stations_to_df(inv_from_df)
Esempio n. 30
0
def df_to_inventory(df) -> obspy.Inventory:
    """
    Create a simple inventory from a dataframe.

    The dataframe must have the same columns as the once produced by
    :func:`obsplus.stations_to_df`.
    """
    def _make_key_mappings(cls):
        """ Create a mapping from columns in df to kwargs for cls. """
        base_params = set(inspect.signature(cls).parameters)
        new_map = mapping_keys[cls]
        base_map = {x: x for x in base_params - set(new_map)}
        base_map.update(new_map)
        return base_map

    def _groupby_if_exists(df, columns):
        """ Groupby columns if they exist on dataframe, else return empty. """
        cols = list(obsplus.utils.iterate(columns))
        if not set(cols).issubset(df.columns):
            return

        # copy df and set missing start/end times to reasonable values
        # this is needed so they get included in a groupby
        df = df.copy()
        isnan = df.isna()
        if "start_date" in columns:
            df["start_date"] = df["start_date"].fillna(0)
        if "end_date" in columns:
            df["end_date"] = df["end_date"].fillna(LARGE_NUMBER)

        for ind, df_sub in df.groupby(cols):
            # replace NaN values
            if isnan.any().any():
                df_sub[isnan.loc[df_sub.index]] = np.nan
            yield ind, df_sub

    def _get_kwargs(series, key_mapping):
        """ create the kwargs from a series and key mapping. """

        out = {}
        for k, v in key_mapping.items():
            # skip if requested kwarg is not in the series
            if v not in series:
                continue
            value = series[v]
            out[k] = value if not pd.isnull(value) else None
        return out

    # first get key_mappings
    net_map = _make_key_mappings(Network)
    sta_map = _make_key_mappings(Station)
    cha_map = _make_key_mappings(Channel)
    # next define columns groupbys should be performed on
    net_columns = ["network"]
    sta_columns = ["station", "start_date", "end_date"]
    cha_columns = ["channel", "location", "start_date", "end_date"]
    # Ensure input is a dataframe
    df = obsplus.stations_to_df(df)
    # replace
    # Iterate networks and create stations
    networks = []
    for net_code, net_df in _groupby_if_exists(df, net_columns):
        stations = []
        for st_code, sta_df in _groupby_if_exists(net_df, sta_columns):
            channels = []
            for ch_code, ch_df in _groupby_if_exists(sta_df, cha_columns):
                kwargs = _get_kwargs(ch_df.iloc[0], cha_map)
                channels.append(Channel(**kwargs))
            kwargs = _get_kwargs(sta_df.iloc[0], sta_map)
            stations.append(Station(channels=channels, **kwargs))
        kwargs = _get_kwargs(net_df.iloc[0], net_map)
        networks.append(Network(stations=stations, **kwargs))

    return obspy.Inventory(networks=networks,
                           source=f"ObsPlus_v{obsplus.__version__}")