Ejemplo n.º 1
0
    def __call__(
        self,
        time_arg: event_time_type,
        time_before: Optional[float] = None,
        time_after: Optional[float] = None,
        *args,
        **kwargs,
    ) -> obspy.Stream:
        """
        Using a reference time, return a waveforms that encompasses that time.

        Parameters
        ----------
        time_arg
            The argument that will indicate a start time. Can be a one
            length events, and event, a float, or a UTCDatetime object
        time_before
            The time before time_arg to include in waveforms
        time_after
            The time after time_arg to include in waveforms

        Returns
        -------
        obspy.Stream
        """
        tbefore = to_timedelta64(time_before, default=self.time_before)
        tafter = to_timedelta64(time_after, default=self.time_after)
        assert (tbefore is not None) and (tafter is not None)
        # get the reference time from the object
        time = to_datetime64(get_reference_time(time_arg))
        t1 = time - tbefore
        t2 = time + tafter
        return self.get_waveforms(starttime=to_utc(t1),
                                  endtime=to_utc(t2),
                                  **kwargs)
Ejemplo n.º 2
0
 def _index2stream(self,
                   index,
                   starttime=None,
                   endtime=None,
                   merge=True) -> Stream:
     """ return the waveforms in the index """
     # get abs path to each datafame
     files: pd.Series = (str(self.bank_path) + index.path).unique()
     # make sure start and endtimes are in UTCDateTime
     starttime = to_utc(starttime) if starttime else None
     endtime = to_utc(endtime) if endtime else None
     # iterate the files to read and try to load into waveforms
     kwargs = dict(format=self.format, starttime=starttime, endtime=endtime)
     func = partial(_try_read_stream, **kwargs)
     stt = obspy.Stream()
     chunksize = (len(files) // self._max_workers) or 1
     for st in self._map(func, files, chunksize=chunksize):
         if st is not None:
             stt += st
     # sort out nullish nslc codes
     stt = replace_null_nlsc_codes(stt)
     # filter out any traces not in index (this can happen when files hold
     # multiple traces).
     nslc = set(get_seed_id_series(index))
     stt.traces = [x for x in stt if x.id in nslc]
     # trim, merge, attach response
     stt = self._prep_output_stream(stt, starttime, endtime, merge=merge)
     return stt
Ejemplo n.º 3
0
def get_waveforms_bulk(stream: Stream, bulk: bulk_waveform_arg_type,
                       **kwargs) -> Stream:
    """
    Get a large number of waveforms with a bulk request.

    Parameters
    ----------
    stream
        A stream object.
    bulk
        A list of any number of tuples containing the following:
        (network, station, location, channel, starttime, endtime).
    """
    # get a dataframe of stream contents
    index = _get_waveform_df(stream)
    # get a dataframe of the bulk arguments, convert time to datetime64
    request_df = get_waveform_bulk_df(bulk)
    if not len(request_df):  # return empty string if no bulk reqs provided
        return obspy.Stream()
    # get unique times and check conditions for string columns
    unique_times = np.unique(request_df[["starttime", "endtime"]].values,
                             axis=0)
    traces = []
    for (t1, t2) in unique_times:
        sub = _filter_index_to_bulk((t1, t2),
                                    index_df=index,
                                    bulk_df=request_df)
        new = obspy.Stream(traces=[x.data for x in sub["trace"]]).slice(
            starttime=to_utc(t1), endtime=to_utc(t2))
        traces.extend(new.traces)
    return merge_traces(obspy.Stream(traces=traces))
Ejemplo n.º 4
0
 def create_stream(
     self,
     starttime: utc_able_type,
     endtime: utc_able_type,
     seed_ids: Optional[List[str]] = None,
     sampling_rate: Optional[Union[float, int]] = None,
 ) -> obspy.Stream:
     """ create a waveforms from random data """
     t1 = to_utc(starttime)
     t2 = to_utc(endtime)
     sr = sampling_rate or self.sampling_rate
     ar_len = int((t2.timestamp - t1.timestamp) * sr)
     st = obspy.Stream()
     for seed in seed_ids or self.seed_ids:
         n, s, l, c = seed.split(".")
         meta = {
             "sampling_rate": sr,
             "starttime": t1,
             "network": n,
             "station": s,
             "location": l,
             "channel": c,
         }
         data = np.random.randn(ar_len)
         tr = obspy.Trace(data=data, header=meta)
         st.append(tr)
     return st
Ejemplo n.º 5
0
def _trim_stream(df, stream, required_len, trim_tolerance):
    """
    Get the starttimes and endtimes for trimming, raise ValueError
    if the stream is disjointed.
    """
    # check trim tolerance
    if trim_tolerance is not None:
        con1 = (df.start.max() - df.start.min()) > trim_tolerance
        con2 = (df.end.max() - df.start.min()) > trim_tolerance
        if con1 or con2:
            msg = ("the following waveforms did not meed the required trim "
                   f"tolerance{str(stream)}")
            raise ValueError(msg)
    # check length requirements, pop out any traces that dont meet it
    if required_len is not None:
        req_len = np.round(required_len * df.duration.max(), 2)
        too_short = df.duration <= req_len
        if too_short.any():
            trace_str = "\n".join([str(x) for x in df[too_short].trace])
            msg = f"These traces are not at least {req_len} seconds long:\n"
            warnings.warn(msg + trace_str + "\n removing them", UserWarning)
            stream.traces = list(df[~too_short].trace)
        df = df[~too_short]
    if not len(df):
        return Stream()
    # get trim time, trim, emit warnings
    t1, t2 = to_utc(df.start.max()), to_utc(df.end.min())
    if t2 < t1:
        msg = f"The following waveforms has traces with no overlaps {stream}"
        raise ValueError(msg)
    return stream.trim(starttime=t1, endtime=t2)
Ejemplo n.º 6
0
 def _func(time, ind, df, st):
     """ return waveforms from df of bulk parameters """
     match_chars = {"*", "?", "[", "]"}
     ar = np.ones(len(ind))  # indices of ind to use to load data
     _t1, _t2 = time[0], time[1]
     df = df[(df.t1 == time[0]) & (df.t2 == time[1])]
     # determine which columns use any matching or other select features
     uses_matches = [_column_contains(df[x], match_chars) for x in NSLC]
     match_ar = np.array(uses_matches).any(axis=0)
     df_match = df[match_ar]
     df_no_match = df[~match_ar]
     # handle columns that need matches (more expensive)
     if not df_match.empty:
         match_bulk = df_match.to_records(index=False)
         mar = np.array(
             [filter_index(ind,
                           *tuple(b)[:4]) for b in match_bulk])
         ar = np.logical_and(ar, mar.any(axis=0))
     # handle columns that do not need matches
     if not df_no_match.empty:
         nslc1 = set(get_seed_id_series(df_no_match))
         nslc2 = get_seed_id_series(ind)
         ar = np.logical_and(ar, nslc2.isin(nslc1))
     # get a list of used traces, combine and trim
     st = obspy.Stream([x for x, y in zip(st, ar) if y])
     return st.slice(starttime=to_utc(_t1), endtime=to_utc(_t2))
Ejemplo n.º 7
0
def stream2contiguous(stream: Stream) -> Stream:
    """
    Yields trimmed streams for times which all traces have data.

    Parameters
    ----------
    stream
        The input stream

    Examples
    --------
    >>> import obspy
    >>> st = obspy.read()
    >>> t1, t2 = st[0].stats.starttime, st[0].stats.endtime
    >>> _ = st[0].trim(endtime=t2 - 2)  # remove data at end of one trace
    >>> out = stream2contiguous(st)
    >>> # stream2contiguous should now have trimmed all traces to match
    >>> assert all(len(tr.data) for tr in st)
    """
    # pre-process waveforms by combining overlaps then breaking up masks
    stream.merge(method=1)
    stream = stream.split()
    # get seed_ids, start time, end time, and gaps
    seed_ids = {tr.id for tr in stream}
    starts, ends = _get_start_end(stream)
    # iterate start/end times, skip gaps and yield chunks of the waveforms
    for t1, t2 in zip(starts, ends):
        if t1 > t2 and len(starts) == len(ends) == 1:
            return  # if disjointed shutdown generator
        assert t1 < t2
        stream_out = stream.slice(starttime=to_utc(t1), endtime=to_utc(t2))
        stream_out.merge(method=1)
        if len({tr.id for tr in stream_out}) == len(seed_ids):
            yield stream_out
Ejemplo n.º 8
0
def ta_time_range(ta_wavebank):
    """return a tuple of time from ta_test bank."""
    df = ta_wavebank.read_index()
    t1 = to_utc(df["starttime"].min()) + 3600
    # move to nearest hour
    start = to_utc(t1.timestamp - t1.timestamp % 3600)
    end = start + 3600 * 6
    return to_utc(start), to_utc(end)
Ejemplo n.º 9
0
    def _get_bulk_args(self,
                       starttime=None,
                       endtime=None,
                       **kwargs) -> bulk_waveform_arg_type:
        """
        Get the bulk waveform arguments based on given start/end times.

        This method also takes into account data availability as contained
        in the stations data.

        Parameters
        ----------
        starttime
            Start times for query.
        endtime
            End times for query.

        Returns
        -------
        List of tuples of the form:
            [(network, station, location, channel, starttime, endtime)]
        """
        station_df = self.station_df.copy()
        inv = station_df[filter_index(station_df, **kwargs)]
        # replace None/Nan with larger number
        inv.loc[inv["end_date"].isnull(), "end_date"] = LARGEDT64
        inv["end_date"] = inv["end_date"].astype("datetime64[ns]")
        # get start/end of the inventory
        inv_start = inv["start_date"].min()
        inv_end = inv["end_date"].max()
        # remove station/channels that dont have data for requested time
        min_time = to_datetime64(starttime, default=inv_start).min()
        max_time = to_datetime64(endtime, default=inv_end).max()
        con1, con2 = (inv["start_date"] > max_time), (inv["end_date"] <
                                                      min_time)
        df = inv[~(con1 | con2)].set_index("seed_id")[list(NSLC)]
        if df.empty:  # return empty list if no data found
            return []
        if isinstance(starttime, pd.Series):
            # Have to get clever here to make sure only active stations get used
            # and indices are not duplicated.
            new_start = starttime.loc[set(starttime.index).intersection(
                df.index)]
            new_end = endtime.loc[set(endtime.index).intersection(df.index)]
            df["starttime"] = new_start.loc[~new_start.index.duplicated()]
            df["endtime"] = new_end.loc[~new_end.index.duplicated()]
        else:
            df["starttime"] = starttime
            df["endtime"] = endtime
        # remove any rows that don't have defined start/end times
        out = df[~(df["starttime"].isnull() | df["endtime"].isnull())]
        # ensure we have UTCDateTime objects
        out["starttime"] = [to_utc(x) for x in out["starttime"]]
        out["endtime"] = [to_utc(x) for x in out["endtime"]]
        # convert to list of tuples and return
        return [tuple(x) for x in out.to_records(index=False)]
Ejemplo n.º 10
0
def _get_stream_start_end(stream, gap_df):
    """
    Return a list of the latest start time of initial chunk and earliest
    endtime of last time chunk.
    """
    st1 = stream.slice(endtime=to_utc(gap_df.t1.min()))
    st2 = stream.slice(starttime=to_utc(gap_df.t2.max()))
    t1 = max([tr.stats.starttime.timestamp for tr in st1])
    t2 = min([tr.stats.endtime.timestamp for tr in st2])
    assert t1 < t2
    return t1, t2
Ejemplo n.º 11
0
def stream_bulk_split(st: Stream,
                      bulk: List[waveform_request_type],
                      fill_value: Any = None) -> List[Stream]:
    """
    Split a stream into a list of streams that meet requirements in bulk.

    This is similar to the get_waveforms_bulk methods of waveform_client, but
    rather than merging any overlapping data it is returned in a list of traces.

    Parameters
    ----------
    st
        A stream object
    bulk
        A bulk request. Wildcards not currently supported on str params.
    fill_value
        If not None fill any missing data in time range with this value.

    Returns
    -------
    List of traces, each meeting the corresponding request in bulk.
    """
    # return nothing if empty bulk or stream args
    bulk = _get_bulk(bulk)
    if not bulk or len(st) == 0:
        return []

    # # get dataframe of stream contents
    sdf = _stream_data_to_df(st)
    # iterate stream, return output
    out = []
    for barg in bulk:
        assert len(
            barg) == 6, f"{barg} is not a valid bulk arg, must have len 6"
        need = filter_index(sdf, *barg)
        traces = [tr for tr, bo in zip(st, need) if bo]
        new_st = obspy.Stream(traces)
        t1, t2 = to_utc(barg[-2]), to_utc(barg[-1])
        new = new_st.slice(starttime=t1, endtime=t2)
        # apply fill if needed
        if fill_value is not None:
            new = new.trim(starttime=t1,
                           endtime=t2,
                           fill_value=fill_value,
                           pad=True)
        if new is None or not len(new):
            out.append(obspy.Stream())
            continue
        new = merge_traces(new)
        out.append(new)
    assert len(out) == len(bulk), "output is not the same len as stream list"
    return out
Ejemplo n.º 12
0
 def test_duplicate_stations(self, inv_df_duplicate_channels):
     """
     Ensure duplicate stations create Station objects with correct
     time range.
     """
     df = inv_df_duplicate_channels
     fur_df = df[df["station"] == "FUR"]
     inv = df_to_inventory(fur_df).select(station="FUR")
     stations = inv.networks
     assert len(stations) == 1
     fur = stations[0]
     assert fur.start_date == to_utc(fur_df["start_date"].min())
     assert fur.end_date == to_utc(fur_df["end_date"].max())
Ejemplo n.º 13
0
 def incomplete_trace(self, node_stats_group, node_st) -> Stream:
     """ Return a stream with part of the data missing for one of its traces """
     # Select a trace from a pick that is referenced in the stats group
     st = node_st.copy()
     pick = node_stats_group.data.iloc[0]
     seed_id = pick.name[-1]
     tr = st.select(id=seed_id)[0]
     # Trim the trace so it ends in the middle of the desired time window
     pick_start = pick["starttime"]
     pick_end = pick["endtime"]
     new_end = to_utc(pick_end) - (to_utc(pick_end) -
                                   to_utc(pick_start)) / 2
     tr.trim(tr.stats.starttime, new_end)  # Acts in place
     return st
Ejemplo n.º 14
0
 def _prep_output_stream(self,
                         st,
                         starttime=None,
                         endtime=None) -> obspy.Stream:
     """
     Prepare waveforms object for output by trimming to desired times,
     merging channels, and attaching responses.
     """
     if not len(st):
         return st
     starttime = starttime or min([x.stats.starttime for x in st])
     endtime = endtime or max([x.stats.endtime for x in st])
     # trim
     st.trim(starttime=to_utc(starttime), endtime=to_utc(endtime))
     return merge_traces(st, inplace=True).sort()
Ejemplo n.º 15
0
def archive_to_sds(
    bank: Union[Path, str, "obsplus.WaveBank"],
    sds_path: Union[Path, str],
    starttime: Optional[UTCDateTime] = None,
    endtime: Optional[UTCDateTime] = None,
    overlap: float = 30,
    type_code: str = "D",
    stream_processor: Optional[callable] = None,
):
    """
    Create a seiscomp data structure archive from a waveform source.

    Parameters
    ----------
    bank
        A wavebank or path to such.
    sds_path
        The path for the new sds archive to be created.
    starttime
        If not None, the starttime to convert data from bank.
    endtime
        If not None, the endtime to convert data from bank.
    overlap
        The overlap to use for each file.
    type_code
        The str indicating the datatype.
    stream_processor
        A callable that will take a single stream as input and return a
        a single stream. May return and empty stream to skip a stream.

    Notes
    -----
    see: https://www.seiscomp3.org/doc/applications/slarchive/SDS.html
    """
    sds_path = Path(sds_path)
    # create a fetcher object for yielding continuous waveforms
    bank = obsplus.WaveBank(bank)
    bank.update_index()
    # get starttime/endtimes
    index = bank.read_index()
    ts1 = index.starttime.min() if not starttime else starttime
    t1 = _nearest_day(ts1)
    t2 = to_utc(index.endtime.max() if not endtime else endtime)
    nslcs = get_seed_id_series(index).unique()
    # iterate over nslc and get data for selected channel
    for nslc in nslcs:
        nslc_dict = {n: v for n, v in zip(NSLC, nslc.split("."))}
        # yield waveforms in desired chunks
        ykwargs = dict(starttime=t1,
                       endtime=t2,
                       overlap=overlap,
                       duration=86400)
        ykwargs.update(nslc_dict)
        for st in bank.yield_waveforms(**ykwargs):
            if stream_processor:  # apply stream processor if needed.
                st = stream_processor(st)
            if st:
                path = _get_sds_filename(st, sds_path, type_code, **nslc_dict)
                st.write(str(path), "mseed")
Ejemplo n.º 16
0
    def yield_waveforms(
        self,
        network: Optional[str] = None,
        station: Optional[str] = None,
        location: Optional[str] = None,
        channel: Optional[str] = None,
        starttime: Optional[obspy.UTCDateTime] = None,
        endtime: Optional[obspy.UTCDateTime] = None,
        duration: float = 3600.0,
        overlap: Optional[float] = None,
    ) -> Stream:
        """
        Yield time-series segments from the waveform client.

        Parameters
        ----------
        {get_waveforms_params}
        duration : float
            The duration of the streams to yield. All channels selected
            channels will be included in the waveforms.
        overlap : float
            If duration is used, the amount of overlap in yielded streams,
            added to the end of the waveforms.

        Notes
        -----
        All string parameters can use posix style matching with * and ? chars.

        Total duration of yielded streams = duration + overlap.

        If no starttime or endtime is provided the min/max indicated by the
        stations will be used.
        """
        # Note: although WaveBank has a yield waveforms method, we want
        # fetcher to work with any client so we don't use its implementation.
        starttime = to_utc(starttime or self.station_df["start_date"].min())
        endtime = to_utc(endtime or self.station_df["end_date"].max())
        time_chunks = make_time_chunks(starttime, endtime, duration, overlap)
        for t1, t2 in time_chunks:
            kwargs = dict(network=network,
                          station=station,
                          location=location,
                          channel=channel)
            yield self.get_waveforms(starttime=t1, endtime=t2, **kwargs)
Ejemplo n.º 17
0
def get_waveforms(
    stream: Stream,
    network: str = "*",
    station: str = "*",
    location: str = "*",
    channel: str = "*",
    starttime: Optional[UTC] = None,
    endtime: Optional[UTC] = None,
) -> obspy.Stream:
    """
    A subset of the Client.get_waveforms method.

    Simply makes successive calls to Stream.select and Stream.trim under the
    hood. Matching is available on all str parameters.

    Parameters
    ----------
    stream
        A stream object.
    network
        The network code
    station
        The station code
    location
        Location code
    channel
        Channel code
    starttime
        Starttime for query
    endtime
        Endtime for query
    """
    t1, t2 = to_utc(starttime or SMALLDT64), to_utc(endtime or LARGEDT64)
    kwargs = {
        c: v
        for c, v in zip(NSLC, [network, station, location, channel])
    }
    st = stream.select(**kwargs).slice(starttime=t1, endtime=t2).copy()
    return st
Ejemplo n.º 18
0
def floatify_dict(some_dict):
    """
    Iterate a dict and convert all TimeStamps/datetime64 to floats.
    Then round all floats to nearest 4 decimals.
    """
    out = {}
    for i, v in some_dict.items():
        if isinstance(v, (pd.Timestamp, np.datetime64)):
            v = to_utc(v).timestamp
        if isinstance(v, float):
            v = np.round(v, 4)
        out[i] = v
    return out
Ejemplo n.º 19
0
def _create_first_pick_origin(first_pick, channel_ser, depth):
    """ Create an origin based on first pick and a channel series. """
    msg = ("origin fixed to location and time of earliest pick by "
           f"obsplus version {obsplus.__last_version__}")
    comment = ev.Comment(text=msg)
    odict = dict(
        time=to_utc(first_pick["time"]),
        latitude=channel_ser["latitude"],
        longitude=channel_ser["longitude"],
        depth=depth,
        time_fixed=True,
        comments=[comment],
    )
    return ev.Origin(**odict)
Ejemplo n.º 20
0
 def test_only_p_phases(self, event_dict_p, subbing_fetcher_with_processor):
     """make sure only stations that have p picks are returned"""
     stream = subbing_fetcher_with_processor.waveform_client.get_waveforms()
     df = subbing_fetcher_with_processor.picks_df
     for eve_id, st in event_dict_p.items():
         con1 = df["event_id"] == eve_id
         con2 = df["phase_hint"].str.upper() == "P"
         pick_df = df[con1 & con2]
         # iterate each pick, determine if it has data in the bank
         for ind, row in pick_df.iterrows():
             time = to_utc(row["time"])
             kwargs = dict(
                 starttime=time - self.time_before,
                 endtime=time + self.time_after,
                 station=row["station"],
             )
             st1 = stream.get_waveforms(**kwargs)
             st2 = st.get_waveforms(**kwargs)
             assert_streams_almost_equal(st1, st2, allow_off_by_one=True)
Ejemplo n.º 21
0
    def df_with_get_stations_kwargs(self):
        """
        Add response information to the dataframe using get_stations_kwargs.

        Add an additional station which will need to get all data from other
        columns.
        """
        _inv = obsplus.load_dataset(
            "bingham_test").station_client.get_stations()
        inv = _inv.select(station="NOQ")

        with suppress_warnings():
            df = obsplus.stations_to_df(inv).reset_index()

        # set get_station_kwargs for last two channels, leave first empty
        kwargs_list = [""]
        for ind, row in df.iloc[1:].iterrows():
            kwargs = {x: row[x] for x in NSLC}
            kwargs["endafter"] = str(to_utc(row["start_date"]))
            kwargs_list.append(kwargs)
        # set last kwargs to str to simulate reading from csv
        kwargs_list[-1] = str(kwargs_list[-1])
        df["get_station_kwargs"] = kwargs_list
        # set the first kwargs to a string to make sure it can be parsed
        # this is important for eg reading data from a csv.
        df.loc[0, "get_station_kwargs"] = str(df.loc[0, "get_station_kwargs"])
        # now add a row with an empty get_station_kwargs column
        old = dict(df.iloc[0])
        new = {
            "station": "CWU",
            "network": "UU",
            "channel": "EHZ",
            "location": "01",
            "seed_id": "UU.CWU.01.EHZ",
            "get_station_kwargs": "{}",
        }
        old.update(new)
        ser = pd.Series(old)
        return df.append(ser, ignore_index=True)
Ejemplo n.º 22
0
def gauss_trace_group(gauss_stat_group) -> TraceGroup:
    """ Create a TraceGroup with a Gaussian pulse as the data """
    # Generate the data
    data = gauss(_t, _a, _b, _c)
    gauss_stat_group.data["sampling_rate"] = 1 / _dt
    # Build a stream from the data
    tr = Trace(
        data=data,
        header={
            "starttime": to_utc(gauss_stat_group.data.iloc[0].starttime),
            "delta": _dt,
            "network": "UK",
            "station": "STA1",
            "channel": "HHZ",
        },
    )
    st = Stream()
    st.append(tr)
    # Add a second trace with a substantial discontinuity caused by zero-padding
    st.append(tr.copy())  # Same data, but the time window in the StatsGroup halves it
    st[1].stats.station = "STA2"
    # Make a TraceGroup
    return mopy.TraceGroup(gauss_stat_group, st, "displacement").fillna()
Ejemplo n.º 23
0
 def test_single_value(self, value):
     """Test either a sequence or UTCDateTime"""
     out = to_utc(value)
     assert isinstance(out, (Sequence, UTCDateTime, np.ndarray))
Ejemplo n.º 24
0
 def new_time(self, bing_first_time):
     """Get a new time based on the first event in bingham_test event + 1"""
     return to_utc(bing_first_time + 1)
Ejemplo n.º 25
0
 def test_time(self, new_event_stream, new_time):
     """ensure the new time was returned"""
     assert len(new_event_stream)
     t1 = to_utc(new_event_stream[0].stats.starttime.timestamp)
     t2 = to_utc(new_event_stream[0].stats.endtime.timestamp)
     assert t1 < new_time < t2
Ejemplo n.º 26
0
def _nearest_day(time):
    """ Round a time down to the nearest day. """
    ts = to_utc(time).timestamp
    ts_day = 3600 * 24
    return to_utc(ts - (ts % ts_day))
Ejemplo n.º 27
0
def bing_first_time(bingham_dataset):
    """Get a new time based on the first event in bingham_test event + 1"""
    df = obsplus.events_to_df(bingham_dataset.event_client.get_events())
    return to_utc(df.iloc[0]["time"])
Ejemplo n.º 28
0
 def _times_to_utc(df):
     """Convert time columns to UTCDateTime."""
     df["starttime"] = to_utc(df["starttime"])
     df["endtime"] = to_utc(df["endtime"])
     return df