Beispiel #1
0
def test_append_simple():
    """Test appending without regard to dimensions."""
    points = 100
    data1 = np.random.random(points)
    data2 = np.random.random(points)
    coords1 = {'time': np.linspace(0, points, points)}
    coords2 = {'time': np.linspace(points, points * 2, points)}
    dims = ["time"]
    samplerate = 10.

    # Base case: everything should Just Work
    ts1 = TimeSeries.create(data1, samplerate, coords=coords1, dims=dims)
    ts2 = TimeSeries.create(data2, samplerate, coords=coords2, dims=dims)
    combined = ts1.append(ts2)
    assert combined.samplerate == samplerate
    assert (combined.data == np.concatenate([data1, data2])).all()
    assert combined.dims == ts1.dims
    assert combined.dims == ts2.dims
    assert (combined.coords['time'] == np.concatenate(
        [coords1['time'], coords2['time']])).all()

    # Append along a new dimension
    combined = ts1.append(ts2, dim='notyet')
    assert combined.shape == (2, 100)
    assert hasattr(combined, 'notyet')

    # Incompatible sample rates
    ts1 = TimeSeries.create(data1, samplerate, coords=coords1, dims=dims)
    ts2 = TimeSeries.create(data2, samplerate + 1, coords=coords2, dims=dims)
    with pytest.raises(ConcatenationError):
        ts1.append(ts2)
Beispiel #2
0
def test_coords_ops():
    data = np.arange(1000).reshape(10, 10, 10)

    ts_1 = TimeSeries.create(data,
                             None,
                             dims=['x', 'y', 'z'],
                             coords={
                                 'x': np.arange(10),
                                 'y': np.arange(10),
                                 'z': np.arange(10) * 2,
                                 'samplerate': 1
                             })
    ts_2 = TimeSeries.create(data,
                             None,
                             dims=['x', 'y', 'z'],
                             coords={
                                 'x': np.arange(10),
                                 'y': np.arange(10),
                                 'z': np.arange(10),
                                 'samplerate': 1
                             })
    ts_out = ts_1 + ts_2
    assert ts_out.z.shape[0] == 5

    ts_out_1 = ts_1 + ts_2[..., ::2]

    assert (ts_out_1 == ts_out).all()

    ts_out_2 = ts_2[..., 1::2] + ts_2[..., ::2]

    assert ts_out_2.shape[-1] == 0

    ts_out_3 = ts_2[..., [0, 2, 3, 4, 8]] + ts_2[..., [3, 4, 8, 9]]

    assert (ts_out_3.z.data == np.array([3, 4, 8])).all()
Beispiel #3
0
def test_samplerate_prop():
    data = np.arange(1000).reshape(10, 10, 10)
    rate = 1000

    ts_1 = TimeSeries.create(data, None, coords={'samplerate': 1})
    ts_2 = TimeSeries.create(data, None, coords={'samplerate': 2})

    with pytest.raises(AssertionError):
        ts_out = ts_1 + ts_2
Beispiel #4
0
def test_addition(i, j, k, expected):
    data = np.arange(1000).reshape(10, 10, 10)
    rate = 1000

    ts_1 = TimeSeries.create(data, None, coords={'samplerate': 1})
    ts_2 = TimeSeries.create(data, None, coords={'samplerate': 1})

    ts_out = ts_1 + ts_2
    assert ts_out[i, j, k] == expected
Beispiel #5
0
def test_hdf(tempdir):
    """Test saving/loading with HDF5."""
    data = np.random.random((10, 10, 10, 10))
    dims = ('time', 'x', 'y', 'z')
    coords = {label: np.linspace(0, 1, 10) for label in dims}
    rate = 1

    ts = TimeSeries.create(data, rate, coords=coords, dims=dims, name="test")

    filename = osp.join(tempdir, "timeseries.h5")
    ts.to_hdf(filename)

    with h5py.File(filename, 'r') as hfile:
        assert "data" in hfile
        assert "dims" in hfile
        assert "coords" in hfile
        assert "name" in list(hfile['/'].attrs.keys())
        assert "ptsa_version" in hfile.attrs
        assert "created" in hfile.attrs

    loaded = TimeSeries.from_hdf(filename)
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        assert (loaded.coords[coord] == ts.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "test"

    ts_with_attrs = TimeSeries.create(data,
                                      rate,
                                      coords=coords,
                                      dims=dims,
                                      name="test",
                                      attrs=dict(a=1, b=[1, 2]))
    ts_with_attrs.to_hdf(filename)
    loaded = TimeSeries.from_hdf(filename)
    for key in ts_with_attrs.attrs:
        assert ts_with_attrs.attrs[key] == loaded.attrs[key]
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        assert (loaded.coords[coord] == ts.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "test"

    # test compression:
    ts_with_attrs.to_hdf(filename, compression='gzip', compression_opts=9)
    loaded = TimeSeries.from_hdf(filename)
    for key in ts_with_attrs.attrs:
        assert ts_with_attrs.attrs[key] == loaded.attrs[key]
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        assert (loaded.coords[coord] == ts.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "test"
Beispiel #6
0
def test_arithmetic_operations():
    data = np.arange(1000).reshape(10, 10, 10)
    rate = 1000

    ts_1 = TimeSeries.create(data, None, coords={'samplerate': 1})
    ts_2 = TimeSeries.create(data, None, coords={'samplerate': 1})

    ts_out = ts_1 + ts_2

    print('ts_out=', ts_out)
    def LoadPTSA(self, df_row, ev_start=0, ev_len=None, buf=None, strict=None):
        '''df_row: A selected DataFrame row.
       ev_start: The relative offset for starting each event in milliseconds.
       ev_len: The length to make of each event in milliseconds.
         dividing the eeg into time around event boundaries.
       buf: Extra time in millieconds to add to both ends of each event.
       strict: Is a bool enabling ArithmeticError for nans.
       Returns a PTSA TimeSeries object.'''
        from ptsa.data.timeseries import TimeSeries
        data, sr, channels = self.LoadEEG(df_row, ev_start, ev_len, buf,
                                          strict)
        if ev_len is None:
            st = 0
        else:
            st = ev_start
            if buf is not None:
                st -= buf
        en = st + (data.shape[-1] - 1) * 1000. / sr
        time = np.linspace(st, en, data.shape[-1])

        coords = {'channel': [str(c) for c in channels.label], 'time': time}
        if ev_len is not None:
            coords['event'] = \
                [{k:v for k,v in r._asdict().items()}
                  for r in self.events.itertuples()]
        return TimeSeries.create(data,
                                 sr,
                                 coords=coords,
                                 dims=('event', 'channel', 'time'))
Beispiel #8
0
    def filter(self):
        """
        Chops session into chunks corresponding to events
        :return: TimeSeries object with chopped session
        """
        chop_on_start_offsets_flag = bool(len(self.start_offsets))

        if chop_on_start_offsets_flag:

            start_offsets = self.start_offsets
            chopping_axis_name = 'start_offsets'
            chopping_axis_data = start_offsets
        else:

            evs = self.events[self.events.eegfile ==
                              self.timeseries.attrs['dataroot']]
            start_offsets = evs.eegoffset
            chopping_axis_name = 'events'
            chopping_axis_data = evs

        samplerate = float(self.timeseries['samplerate'])
        offset_time_array = self.timeseries['offsets']

        event_chunk_size, start_point_shift = self.get_event_chunk_size_and_start_point_shift(
            eegoffset=start_offsets[0],
            samplerate=samplerate,
            offset_time_array=offset_time_array)

        event_time_axis = np.arange(event_chunk_size) * (1.0 / samplerate) + (
            self.start_time - self.buffer_time)

        data_list = []

        for i, eegoffset in enumerate(start_offsets):

            start_chop_pos = np.where(offset_time_array >= eegoffset)[0][0]
            start_chop_pos += start_point_shift
            selector_array = np.arange(start=start_chop_pos,
                                       stop=start_chop_pos + event_chunk_size)

            chopped_data_array = self.timeseries.isel(time=selector_array)

            chopped_data_array['time'] = event_time_axis
            chopped_data_array['start_offsets'] = [i]

            data_list.append(chopped_data_array)

        ev_concat_data = xr.concat(data_list, dim='start_offsets')

        ev_concat_data = ev_concat_data.rename(
            {'start_offsets': chopping_axis_name})
        ev_concat_data[chopping_axis_name] = chopping_axis_data

        attrs = {
            "start_time": self.start_time,
            "end_time": self.end_time,
            "buffer_time": self.buffer_time
        }
        ev_concat_data['samplerate'] = samplerate
        return TimeSeries.create(ev_concat_data, samplerate, attrs=attrs)
Beispiel #9
0
def test_init():
    """Test that everything is initialized properly."""
    data = np.random.random((10, 10, 10))
    rate = 1000

    with pytest.raises(AssertionError):
        TimeSeries(data, {})

    with pytest.raises(AssertionError):
        TimeSeries.create(data, None, coords={})

    assert TimeSeries.create(data, None, coords={
        'samplerate': 1
    }).samplerate == 1

    ts = TimeSeries(data, dict(samplerate=rate))
    assert isinstance(ts, xr.DataArray)
    assert ts.shape == (10, 10, 10)
    assert ts['samplerate'] == rate
Beispiel #10
0
def test_baseline_corrected():
    t = np.linspace(0, 10, 100)
    values = np.array([1] * 50 + [2] * 50)
    coords = {"time": t}
    ts = TimeSeries.create(values, 10., coords, dims=("time", ))
    corrected = ts.baseline_corrected((0, 5))
    assert all(ts['time'] == corrected['time'])
    assert ts['samplerate'] == corrected['samplerate']
    assert all(corrected.data[:50] == 0)
    assert all(corrected.data[50:] == 1)
Beispiel #11
0
def test_resampled():
    ts = TimeSeries.create(np.linspace(0, 100, 100), 10., dims=['time'])

    resampled = ts.resampled(20.)
    assert resampled.data.shape == (200, )
    assert resampled['samplerate'] == 20

    resampled = ts.resampled(5)
    assert resampled.data.shape == (50, )
    assert resampled['samplerate'] == 5
Beispiel #12
0
def test_mean():
    """tests various ways to compute mean - collapsing different
combination of axes"""
    data = np.arange(100).reshape(10, 10)
    ts_1 = TimeSeries.create(data,
                             None,
                             dims=['x', 'y'],
                             coords={
                                 'x': np.arange(10) * 2,
                                 'y': np.arange(10),
                                 'samplerate': 1
                             })
    grand_mean = ts_1.mean()

    assert grand_mean == 49.5

    x_mean = ts_1.mean(dim='x')
    assert (x_mean == np.arange(45, 55, 1, dtype=np.float)).all()
    # checking axes
    assert (ts_1.y == x_mean.y).all()

    y_mean = ts_1.mean(dim='y')
    assert (y_mean == np.arange(4.5, 95, 10, dtype=np.float)).all()
    # checking axes
    assert (y_mean.x == ts_1.x).all()

    # test mean NaN
    data_2 = np.arange(100, dtype=np.float).reshape(10, 10)
    np.fill_diagonal(data_2, np.NaN)
    # data_2[9,9] = 99

    ts_2 = TimeSeries.create(data_2,
                             None,
                             dims=['x', 'y'],
                             coords={
                                 'x': np.arange(10) * 2,
                                 'y': np.arange(10),
                                 'samplerate': 1
                             })

    grand_mean = ts_2.mean(skipna=True)
    assert grand_mean == 49.5
Beispiel #13
0
def test_concatenate():
    """make sure we can concatenate easily time series x - test it with rec
    array as one of the coords.

    This fails for xarray > 0.7. See
    https://github.com/pydata/xarray/issues/1434 for details.

    """
    p1 = np.array([('John', 180), ('Stacy', 150), ('Dick', 200)],
                  dtype=[('name', '|S256'), ('height', int)])
    p2 = np.array([('Bernie', 170), ('Donald', 250), ('Hillary', 150)],
                  dtype=[('name', '|S256'), ('height', int)])

    data = np.arange(50, 80, 1, dtype=np.float)
    dims = ['measurement', 'participant']

    ts1 = TimeSeries.create(data.reshape(10, 3),
                            None,
                            dims=dims,
                            coords={
                                'measurement': np.arange(10),
                                'participant': p1,
                                'samplerate': 1
                            })

    ts2 = TimeSeries.create(data.reshape(10, 3) * 2,
                            None,
                            dims=dims,
                            coords={
                                'measurement': np.arange(10),
                                'participant': p2,
                                'samplerate': 1
                            })

    combined = xr.concat((ts1, ts2), dim='participant')

    assert isinstance(combined, TimeSeries)
    assert (combined.participant.data['height'] == np.array(
        [180, 150, 200, 170, 250, 150])).all()
    assert (combined.participant.data['name'] == np.array(
        ['John', 'Stacy', 'Dick', 'Bernie', 'Donald', 'Hillary'])).all()
 def _create_spike_timeseries(self, spike_data, time, sr, events):
     # create an TimeSeries object
     dims = ('event', 'time')
     coords = {
         'event':
         events[events.columns[events.columns != 'index']].to_records(),
         'time': time
     }
     return TimeSeries.create(spike_data,
                              samplerate=sr,
                              dims=dims,
                              coords=coords)
    def _create_eeg_timeseries(self, grp, events):
        data = np.array(grp['ev_eeg'])
        time = grp.attrs['time']
        channel = grp.attrs['channel']
        sr = grp.attrs['samplerate']

        # create an TimeSeries object (in order to make use of their wavelet calculation)
        dims = ('event', 'time', 'channel')
        coords = {'event': events[events.columns[events.columns != 'index']].to_records(),
                  'time': time,
                  'channel': [channel]}

        return TimeSeries.create(data, samplerate=sr, dims=dims, coords=coords)
    def _create_eeg_timeseries(self, grp, events):
        data = np.array(grp['ev_eeg'])
        time = grp.attrs['time']
        channel = grp.attrs['channel']
        sr = grp.attrs['samplerate']

        # create an TimeSeries object (in order to make use of their wavelet calculation)
        dims = ('event', 'time', 'channel')
        coords = {'event': events[events.columns[events.columns != 'index']].to_records(),
                  'time': time,
                  'channel': [channel]}

        return TimeSeries.create(data, samplerate=sr, dims=dims, coords=coords)
Beispiel #17
0
    def to_ptsa(self, recarray=False) -> "TimeSeries":  # noqa: F821
        """Convert to a PTSA :class:`TimeSeriesX` object.

        Parameters
        ----------
        recarray
            If True, events get stored as a recarray, to preserve backwards compatibility
            If False, events get stored as xarray coordinates.

        Notes
        -----
        Events are first converted from a :class:`pd.DataFrame` to a Numpy
        recarray and are available as the ``event`` coordinate.

        """
        from ptsa.data.timeseries import TimeSeries
        dims = ("event", "channel", "time")

        if self.events is not None:
            if recarray:
                events = self.events.to_records()
            else:
                for col in self.events.columns:
                    if isinstance(self.events[col].iloc[0], list):
                        self.events[col] = self.events[col].apply(tuple)
                events = pd.MultiIndex.from_frame(self.events)
        else:
            columns = ["eegoffset", "epoch_end"]
            if len(self.epochs[0]) > 2:
                columns = [
                    columns[i] if i < 2 else "column_{}".format(i)
                    for i in range(len(self.epochs[0]))
                ]
            events = pd.MultiIndex.from_frame(
                pd.DataFrame(self.epochs, columns=columns))
            if recarray:
                events = events.to_records(index=False)

        coords = {
            "event": events,
            "channel": self.channels,
            "time": self.time,
        }

        return TimeSeries.create(
            self.data,
            samplerate=self.samplerate,
            dims=dims,
            coords=coords,
        )
Beispiel #18
0
def test_filtered():
    data = np.random.random(1000)
    dims = ['time']

    ts = TimeSeries.create(data, 10, dims=dims)

    # TODO: real test (i.e., actually care about the filtering)
    with warnings.catch_warnings(record=True) as w:
        new_ts = ts.filtered([1, 2])
        assert len(w) == 1
        assert ts['samplerate'] == new_ts['samplerate']
        assert all(ts.data != new_ts.data)
        for key, attr in ts.attrs.items():
            assert attr == new_ts[key]
        assert ts.name == new_ts.name
        assert ts.dims == new_ts.dims
Beispiel #19
0
def test_add_mirror_buffer():
    points = 100

    data = np.array([-1] * points + [1] * points)
    samplerate = 10.
    coords = {'time': np.linspace(-1, 1, points * 2)}
    dims = ['time']
    ts = TimeSeries.create(data, samplerate, coords=coords, dims=dims)

    duration = 10
    buffered = ts.add_mirror_buffer(duration)
    assert len(buffered.data) == len(data) + 2 * duration * samplerate

    with pytest.raises(ValueError):
        # 100 s is longer than the length of data
        ts.add_mirror_buffer(100)
Beispiel #20
0
def test_remove_buffer():
    length = 100
    data = np.array([0] * length)
    samplerate = 10.
    coords = {'time': np.linspace(-1, 1, length)}
    dims = ['time']
    ts = TimeSeries.create(data, samplerate, coords=coords, dims=dims)

    with pytest.raises(ValueError):
        # We can't remove this much
        ts.remove_buffer(int(samplerate * length + 1))

    buffer_dur = 0.1
    buffered = ts.add_mirror_buffer(buffer_dur)
    unbuffered = buffered.remove_buffer(buffer_dur)

    assert len(unbuffered.data) == len(ts.data)
    assert (unbuffered.data == ts.data).all()
Beispiel #21
0
def test_filter_with(cls, kwargs):
    ts = TimeSeries.create(np.random.random((2, 100)),
                           samplerate=10,
                           dims=("x", "time"),
                           coords={
                               "x": range(2),
                               "time": range(100),
                           })

    if cls is None:

        class MyClass(object):
            pass

        with pytest.raises(TypeError):
            ts.filter_with(MyClass)
    else:
        tsf = ts.filter_with(cls, **kwargs)
        assert isinstance(tsf, TimeSeries)
        assert tsf.data.shape != ts.data.shape
Beispiel #22
0
    def to_ptsa(self) -> "TimeSeries":
        """Convert to a PTSA :class:`TimeSeriesX` object.

        Notes
        -----
        Events are first converted from a :class:`pd.DataFrame` to a Numpy
        recarray and are available as the ``event`` coordinate.

        """
        from ptsa.data.timeseries import TimeSeries

        dims = ("event", "channel", "time")

        if self.events is not None:
            events = self.events.to_records()
        else:
            columns = ["eegoffset", "epoch_end"]
            if len(self.epochs[0]) > 2:
                columns = [
                    columns[i] if i < 2 else "column_{}".format(i)
                    for i in range(len(self.epochs[0]))
                ]
            events = pd.DataFrame(self.epochs,
                                  columns=columns).to_records(index=False)

        coords = {
            "event": events,
            "channel": self.channels,
            "time": self.time,
        }

        return TimeSeries.create(
            self.data,
            samplerate=self.samplerate,
            dims=dims,
            coords=coords,
        )
def load_eeg(events,
             rel_start_ms,
             rel_stop_ms,
             buf_ms=0,
             elec_scheme=None,
             noise_freq=[58., 62.],
             resample_freq=None,
             pass_band=None,
             use_mirror_buf=False,
             demean=False,
             do_average_ref=False):
    """
    Returns an EEG TimeSeries object.

    Parameters
    ----------
    events: pandas.DataFrame
        An events dataframe that contains eegoffset and eegfile fields
    rel_start_ms: int
        Initial time (in ms), relative to the onset of each event
    rel_stop_ms: int
        End time (in ms), relative to the onset of each event
    buf_ms:
        Amount of time (in ms) of buffer to add to both the begining and end of the time interval
    elec_scheme: pandas.DataFrame
        A dataframe of electrode information, returned by load_elec_info(). If the column 'contact' is in the dataframe,
        monopolar electrodes will be loads. If the columns 'contact_1' and 'contact_2' are in the df, bipolar will be
        loaded. You may pass in a subset of rows to only load data for electrodes in those rows.

        If you do not enter an elec_scheme, all monopolar channels will be loaded (but they will not be labeled with
        correct channel tags). Entering a scheme is recommended.
    noise_freq: list
        Stop filter will be applied to the given range. Default=(58. 62)
    resample_freq: float
        Sampling rate to resample to after loading eeg.
    pass_band: list
        If given, the eeg will be band pass filtered in the given range.
    use_mirror_buf: bool
        If True, the buffer will be data taken from within the rel_start_ms to rel_stop_ms interval,
        mirrored and prepended and appended to the timeseries. If False, data outside the rel_start_ms and rel_stop_ms
        interval will be read.
    demean: bool
        If True, will subject the mean voltage between rel_start_ms and rel_stop_ms from each channel
    do_average_ref: bool
        If True, will compute the average reference based on the mean voltage across channels

    Returns
    -------
    TimeSeries
        EEG timeseries object with dimensions channels x events x time (or bipolar_pairs x events x time)

        NOTE: The EEG data is returned with time buffer included. If you included a buffer and want to remove it,
              you may use the .remove_buffer() method. EXTRA NOTE: INPUT SECONDS FOR REMOVING BUFFER, NOT MS!!

    """

    # check if monopolar is possible for this subject
    if 'contact' in elec_scheme:
        eegfile = np.unique(events.eegfile)[0]
        if os.path.splitext(eegfile)[1] == '.h5':
            eegfile = f'/protocols/r1/subjects/{events.iloc[0].subject}/experiments/{events.iloc[0].experiment}/sessions/{events.iloc[0].session}/ephys/current_processed/noreref/{eegfile}'
            with h5py.File(eegfile, 'r') as f:
                if not np.array(f['monopolar_possible'])[0] == 1:
                    print('Monopolar referencing not possible for {}'.format(
                        events.iloc[0].subject))
                    return

    # add buffer is using
    if (buf_ms is not None) and not use_mirror_buf:
        actual_start = rel_start_ms - buf_ms
        actual_stop = rel_stop_ms + buf_ms
    else:
        actual_start = rel_start_ms
        actual_stop = rel_stop_ms

    # load eeg
    eeg = CMLReader(subject=events.iloc[0].subject).load_eeg(
        events,
        rel_start=actual_start,
        rel_stop=actual_stop,
        scheme=elec_scheme).to_ptsa()

    # now auto cast to float32 to help with memory issues with high sample rate data
    eeg.data = eeg.data.astype('float32')

    # baseline correct subracting the mean within the baseline time range
    if demean:
        eeg = eeg.baseline_corrected([rel_start_ms, rel_stop_ms])

    # compute average reference by subracting the mean across channels
    if do_average_ref:
        eeg = eeg - eeg.mean(dim='channel')

    # add mirror buffer if using. PTSA is expecting this to be in seconds.
    if use_mirror_buf:
        eeg = eeg.add_mirror_buffer(buf_ms / 1000.)

    # filter line noise
    if noise_freq is not None:
        if isinstance(noise_freq[0], float):
            noise_freq = [noise_freq]
        for this_noise_freq in noise_freq:
            for this_chan in range(eeg.shape[1]):
                b_filter = ButterworthFilter(eeg[:, this_chan:this_chan + 1],
                                             this_noise_freq,
                                             filt_type='stop',
                                             order=4)
                eeg[:, this_chan:this_chan + 1] = b_filter.filter()

    # resample if desired. Note: can be a bit slow especially if have a lot of eeg data
    # pdb.set_trace()
    # if resample_freq is not None:
    #     for this_chan in range(eeg.shape[1]):
    #         r_filter = ResampleFilter(eeg[:, this_chan:this_chan+1], resample_freq)
    #         eeg[:, this_chan:this_chan + 1] = r_filter.filter()

    if resample_freq is not None:
        eeg_resamp = []
        for this_chan in range(eeg.shape[1]):
            r_filter = ResampleFilter(eeg[:, this_chan:this_chan + 1],
                                      resample_freq)
            eeg_resamp.append(r_filter.filter())
        coords = {x: eeg[x] for x in eeg.coords.keys()}
        coords['time'] = eeg_resamp[0]['time']
        coords['samplerate'] = resample_freq
        dims = eeg.dims
        eeg = TimeSeries.create(np.concatenate(eeg_resamp, axis=1),
                                resample_freq,
                                coords=coords,
                                dims=dims)

    # do band pass if desired.
    if pass_band is not None:
        eeg = band_pass_eeg(eeg, pass_band)

    # reorder dims to make events first
    eeg = make_events_first_dim(eeg)
    return eeg
Beispiel #24
0
def _load_eeg_timeseries(events,
                         rel_start_ms,
                         rel_stop_ms,
                         channel_list,
                         buf_ms=0,
                         downsample_freq=1000,
                         resample_freq=None):
    """

    Parameters
    ----------
    events: pandas.DataFrame
        DataFrame with the column 'stTime', specifying the timestamp when the event occurred
    rel_start_ms: int
        Relative time (ms) to add to the stTime to define the start of the time interval
    rel_stop_ms: int
        Relative time (ms) to add to the stTime to define the end of the time interval
    channel_list: list
        List of channel Ncs files
    buf_ms:
        Buffer (ms) to add to the start and end of the time period
    downsample_freq: int
        sample rate to downsample sample initial data immediately after loading the full file
    resample_freq: int
        Resample eeg to this value. Done after epoching.

    Returns
    -------
        ptsa.TimeSeries with dims event x time x channel
    """

    # will build a list of eeg data that we will concatenate across channels
    eeg_list = []

    # epochs will be a list of tuples of start and stop sample offsets
    epochs = None
    for channel in channel_list:

        # load channel data
        signals, timestamps, sr = load_ncs(channel)

        if downsample_freq is not None:
            signals, timestamps, sr = _my_downsample(signals, timestamps, sr,
                                                     downsample_freq)

        # get start and stop samples (only once)
        # assumes all channels have the same timestamps..
        if epochs is None:
            epochs = _compute_epochs(events, rel_start_ms - buf_ms,
                                     rel_stop_ms + buf_ms, timestamps, sr)

            # remove any epochs < 0
            bad_epochs = (np.any(epochs < 0, 1)) | (np.any(
                epochs > len(signals), 1))
            epochs = epochs[~bad_epochs]
            events = events[~bad_epochs].reset_index(drop=True)

        # segment the continuous eeg into epochs. Also resample.
        eeg, new_time = _segment_eeg_single_channel(signals, epochs, sr,
                                                    timestamps, resample_freq)
        eeg_list.append(eeg)

    # create timeseries
    dims = ('event', 'time', 'channel')
    coords = {
        'event': events.to_records(),
        'time': (new_time[0] - events.stTime.values[0]) / 1e6,
        'channel': channel_list
    }
    sr_for_ptsa = resample_freq if resample_freq is not None else sr
    eeg_all_chans = TimeSeries.create(np.stack(eeg_list, -1),
                                      samplerate=sr_for_ptsa,
                                      dims=dims,
                                      coords=coords)
    return eeg_all_chans
Beispiel #25
0
def power_spectra_from_spike_times(s_times,
                                   clust_nums,
                                   channel_file,
                                   rel_start_ms,
                                   rel_stop_ms,
                                   freqs,
                                   noise_freq=[58., 62.],
                                   downsample_freq=250,
                                   mean_over_spikes=True):
    """
    Function to compute power relative to spike times. This computes power at given frequencies for the ENTIRE session
    and then bins it relative to spike times. You WILL run out of memory if you don't let it downsample first. Default
    downsample is to 250 Hz.

    Parameters
    ----------
    s_times: np.ndarray
        Array (or list) of timestamps of when spikes occured. EEG will be loaded relative to these times.
    clust_nums:
        s_times: np.ndarray
        Array (or list) of cluster IDs, same size as s_times
    channel_file: str
        Path to Ncs file from which to load eeg.
    rel_start_ms: int
        Initial time (in ms), relative to the onset of each spike
    rel_stop_ms: int
        End time (in ms), relative to the onset of each spike
    freqs: np.ndarray
        array of frequencies at which to compute power
    noise_freq: list
        Stop filter will be applied to the given range. Default=[58. 62]
    downsample_freq: int or float
        Frequency to downsample the data. Use decimate, so we will likely not reach the exact frequency.
    mean_over_spikes: bool
        After computing the spike x frequency array, do we mean over spikes and return only the mean power spectra

    Returns
    -------
    dict
        dict of either spike x frequency array of power values or just frequencies, if mean_over_spikes. Keys are
        cluster numbers
    """

    # make a df with 'stTime' column for epoching
    events = pd.DataFrame(data=np.stack([s_times, clust_nums], -1),
                          columns=['stTime', 'cluster_num'])

    # load channel data
    signals, timestamps, sr = load_ncs(channel_file)

    # downsample the session
    if downsample_freq is not None:
        signals, timestamps, sr = _my_downsample(signals, timestamps, sr,
                                                 downsample_freq)
    else:
        print(
            'I HIGHLY recommend you downsample the data before computing power across the whole session...'
        )
        print('You will probably run out of memory.')

    # make into timeseries
    eeg = TimeSeries.create(signals,
                            samplerate=sr,
                            dims=['time'],
                            coords={'time': timestamps / 1e6})

    # filter line noise
    if noise_freq is not None:
        if isinstance(noise_freq[0], float):
            noise_freq = [noise_freq]
        for this_noise_freq in noise_freq:
            b_filter = ButterworthFilter(eeg,
                                         this_noise_freq,
                                         filt_type='stop',
                                         order=4)
            eeg = b_filter.filter()

    # compute power
    wave_pow = MorletWaveletFilter(eeg,
                                   freqs,
                                   output='power',
                                   width=5,
                                   cpus=12,
                                   verbose=False).filter()

    # log the power
    data = wave_pow.data
    wave_pow.data = numexpr.evaluate('log10(data)')

    # get start and stop relative to the spikes
    epochs = _compute_epochs(events, rel_start_ms, rel_stop_ms, timestamps, sr)
    bad_epochs = (np.any(epochs < 0, 1)) | (np.any(epochs > len(signals), 1))
    epochs = epochs[~bad_epochs]
    events = events[~bad_epochs].reset_index(drop=True)

    # mean over time within epochs
    spikes_x_freqs = np.stack(
        [np.mean(wave_pow.data[:, x[0]:x[1]], axis=1) for x in epochs])

    # make dict with keys being cluster numbers. Mean over spikes if desired.
    pow_spect_dict = {}
    for this_cluster in events.cluster_num.unique():
        if mean_over_spikes:
            pow_spect_dict[this_cluster] = spikes_x_freqs[
                events.cluster_num == this_cluster].mean(axis=0)
        else:
            pow_spect_dict[this_cluster] = spikes_x_freqs[events.cluster_num ==
                                                          this_cluster]

    return pow_spect_dict
Beispiel #26
0
def test_hdf(tempdir):
    """Test saving/loading with HDF5."""
    data = np.random.random((10, 10, 10, 10))
    dims = ('time', 'x', 'y', 'z')
    coords = {label: np.linspace(0, 1, 10) for label in dims}
    rate = 1

    ts = TimeSeries.create(data, rate, coords=coords, dims=dims, name="test")

    filename = osp.join(tempdir, "timeseries.h5")
    ts.to_hdf(filename)

    with h5py.File(filename, 'r') as hfile:
        assert "data" in hfile
        assert "dims" in hfile
        assert "coords" in hfile
        assert "name" in list(hfile['/'].attrs.keys())
        assert "ptsa_version" in hfile.attrs
        assert "created" in hfile.attrs

    loaded = TimeSeries.from_hdf(filename)
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        assert (loaded.coords[coord] == ts.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "test"

    ts_with_attrs = TimeSeries.create(data,
                                      rate,
                                      coords=coords,
                                      dims=dims,
                                      name="test",
                                      attrs=dict(a=1, b=[1, 2]))
    ts_with_attrs.to_hdf(filename)
    loaded = TimeSeries.from_hdf(filename)
    for key in ts_with_attrs.attrs:
        assert ts_with_attrs.attrs[key] == loaded.attrs[key]
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        assert (loaded.coords[coord] == ts_with_attrs.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "test"

    # test compression:
    ts_with_attrs.to_hdf(filename, compression='gzip', compression_opts=9)
    loaded = TimeSeries.from_hdf(filename)
    for key in ts_with_attrs.attrs:
        assert ts_with_attrs.attrs[key] == loaded.attrs[key]
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        assert (loaded.coords[coord] == ts_with_attrs.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "test"

    # test different containers as dims:
    data = np.random.random((3, 7, 10, 4))
    dims = ('time', 'recarray', 'list', 'recordarray')
    coords = {
        'time':
        np.linspace(0, 1, 3),
        'recarray':
        np.array([(i, j, k) for i, j, k in zip(np.linspace(
            0, 1, 7), np.linspace(1000, 2000, 7), np.linspace(0, 1, 7))],
                 dtype=[('field1', np.float), ('field2', np.int),
                        ('field3', 'U20')]),
        'list':
        list(np.linspace(100, 200, 10)),
        'recordarray':
        np.array([(i, j, k) for i, j, k in zip(np.linspace(
            0, 1, 4), np.linspace(1000, 2000, 4), np.linspace(0, 1, 4))],
                 dtype=[('field1', np.float), ('field2', np.int),
                        ('field3', 'U20')]).view(np.recarray)
    }
    rate = 1
    ts = TimeSeries.create(data,
                           rate,
                           coords=coords,
                           dims=dims,
                           name="container test")
    ts.to_hdf(filename, compression='gzip', compression_opts=9)
    with h5py.File(filename, 'r') as hfile:
        assert "data" in hfile
        assert "dims" in hfile
        assert "coords" in hfile
        assert "name" in list(hfile['/'].attrs.keys())
        assert "ptsa_version" in hfile.attrs
        assert "created" in hfile.attrs
    loaded = TimeSeries.from_hdf(filename)
    for key in ts.attrs:
        assert ts.attrs[key] == loaded.attrs[key]
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        # dtypes can be slightly differnt for recarrays:
        assert (np.array(loaded.coords[coord],
                         ts[coord].values.dtype) == ts.coords[coord]).all()
    for coord in ts.coords:
        # dtypes can be slightly differnt for recarrays:
        assert (np.array(loaded.coords[coord],
                         ts[coord].values.dtype) == ts.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "container test"
def _load_eeg_timeseries(events, rel_start_ms, rel_stop_ms, channel_list, buf_ms=0, downsample_freq=1000,
                         resample_freq=None):
    """

    Parameters
    ----------
    events: pandas.DataFrame
        DataFrame with the column 'stTime', specifying the timestamp when the event occurred
    rel_start_ms: int
        Relative time (ms) to add to the stTime to define the start of the time interval
    rel_stop_ms: int
        Relative time (ms) to add to the stTime to define the end of the time interval
    channel_list: list
        List of channel Ncs files
    buf_ms:
        Buffer (ms) to add to the start and end of the time period
    downsample_freq: int
        sample rate to downsample sample initial data immediately after loading the full file
    resample_freq: int
        Resample eeg to this value. Done after epoching.

    Returns
    -------
        ptsa.TimeSeries with dims event x time x channel
    """

    # will build a list of eeg data that we will concatenate across channels
    eeg_list = []

    # epochs will be a list of tuples of start and stop sample offsets
    epochs = None
    for channel in channel_list:

        # load channel data
        signals, timestamps, sr = load_ncs(channel)

        if downsample_freq is not None:
            signals, timestamps, sr = _my_downsample(signals, timestamps, sr, downsample_freq)

        # get start and stop samples (only once)
        # assumes all channels have the same timestamps..
        if epochs is None:
            epochs = _compute_epochs(events, rel_start_ms - buf_ms, rel_stop_ms + buf_ms, timestamps, sr)

            # remove any epochs < 0
            bad_epochs = (np.any(epochs < 0, 1)) | (np.any(epochs > len(signals), 1))
            epochs = epochs[~bad_epochs]
            events = events[~bad_epochs].reset_index(drop=True)

        # segment the continuous eeg into epochs. Also resample.
        eeg, new_time = _segment_eeg_single_channel(signals, epochs, sr, timestamps, resample_freq)
        eeg_list.append(eeg)

    # create timeseries
    dims = ('event', 'time', 'channel')
    coords = {'event': events.to_records(),
              'time': (new_time[0] - events.stTime.values[0])/1e6,
              'channel': channel_list}
    sr_for_ptsa = resample_freq if resample_freq is not None else sr
    eeg_all_chans = TimeSeries.create(np.stack(eeg_list, -1), samplerate=sr_for_ptsa, dims=dims, coords=coords)
    return eeg_all_chans
def power_spectra_from_spike_times(s_times, clust_nums, channel_file, rel_start_ms, rel_stop_ms, freqs,
                                           noise_freq=[58., 62.], downsample_freq=250, mean_over_spikes=True):
    """
    Function to compute power relative to spike times. This computes power at given frequencies for the ENTIRE session
    and then bins it relative to spike times. You WILL run out of memory if you don't let it downsample first. Default
    downsample is to 250 Hz.

    Parameters
    ----------
    s_times: np.ndarray
        Array (or list) of timestamps of when spikes occured. EEG will be loaded relative to these times.
    clust_nums:
        s_times: np.ndarray
        Array (or list) of cluster IDs, same size as s_times
    channel_file: str
        Path to Ncs file from which to load eeg.
    rel_start_ms: int
        Initial time (in ms), relative to the onset of each spike
    rel_stop_ms: int
        End time (in ms), relative to the onset of each spike
    freqs: np.ndarray
        array of frequencies at which to compute power
    noise_freq: list
        Stop filter will be applied to the given range. Default=[58. 62]
    downsample_freq: int or float
        Frequency to downsample the data. Use decimate, so we will likely not reach the exact frequency.
    mean_over_spikes: bool
        After computing the spike x frequency array, do we mean over spikes and return only the mean power spectra

    Returns
    -------
    dict
        dict of either spike x frequency array of power values or just frequencies, if mean_over_spikes. Keys are
        cluster numbers
    """

    # make a df with 'stTime' column for epoching
    events = pd.DataFrame(data=np.stack([s_times, clust_nums], -1), columns=['stTime', 'cluster_num'])

    # load channel data
    signals, timestamps, sr = load_ncs(channel_file)

    # downsample the session
    if downsample_freq is not None:
        signals, timestamps, sr = _my_downsample(signals, timestamps, sr, downsample_freq)
    else:
        print('I HIGHLY recommend you downsample the data before computing power across the whole session...')
        print('You will probably run out of memory.')

    # make into timeseries
    eeg = TimeSeries.create(signals, samplerate=sr, dims=['time'], coords={'time': timestamps / 1e6})

    # filter line noise
    if noise_freq is not None:
        if isinstance(noise_freq[0], float):
            noise_freq = [noise_freq]
        for this_noise_freq in noise_freq:
            b_filter = ButterworthFilter(eeg, this_noise_freq, filt_type='stop', order=4)
            eeg = b_filter.filter()

    # compute power
    wave_pow = MorletWaveletFilter(eeg, freqs, output='power', width=5, cpus=12, verbose=False).filter()

    # log the power
    data = wave_pow.data
    wave_pow.data = numexpr.evaluate('log10(data)')

    # get start and stop relative to the spikes
    epochs = _compute_epochs(events, rel_start_ms, rel_stop_ms, timestamps, sr)
    bad_epochs = (np.any(epochs < 0, 1)) | (np.any(epochs > len(signals), 1))
    epochs = epochs[~bad_epochs]
    events = events[~bad_epochs].reset_index(drop=True)

    # mean over time within epochs
    spikes_x_freqs = np.stack([np.mean(wave_pow.data[:, x[0]:x[1]], axis=1) for x in epochs])

    # make dict with keys being cluster numbers. Mean over spikes if desired.
    pow_spect_dict = {}
    for this_cluster in events.cluster_num.unique():
        if mean_over_spikes:
            pow_spect_dict[this_cluster] = spikes_x_freqs[events.cluster_num == this_cluster].mean(axis=0)
        else:
            pow_spect_dict[this_cluster] = spikes_x_freqs[events.cluster_num == this_cluster]

    return pow_spect_dict
 def _create_spike_timeseries(self, spike_data, time, sr, events):
     # create an TimeSeries object
     dims = ('event', 'time')
     coords = {'event': events[events.columns[events.columns != 'index']].to_records(),
               'time': time}
     return TimeSeries.create(spike_data, samplerate=sr, dims=dims, coords=coords)
Beispiel #30
0
def test_append_recarray():
    """Test appending along a dimension with a recarray."""
    p1 = np.array([('John', 180), ('Stacy', 150), ('Dick', 200)],
                  dtype=[('name', '|S256'), ('height', int)])
    p2 = np.array([('Bernie', 170), ('Donald', 250), ('Hillary', 150)],
                  dtype=[('name', '|S256'), ('height', int)])

    data = np.arange(50, 80, 1, dtype=np.float)
    dims = ['measurement', 'participant']

    ts1 = TimeSeries.create(data.reshape(10, 3),
                            None,
                            dims=dims,
                            coords={
                                'measurement': np.arange(10),
                                'participant': p1,
                                'samplerate': 1
                            })

    ts2 = TimeSeries.create(data.reshape(10, 3) * 2,
                            None,
                            dims=dims,
                            coords={
                                'measurement': np.arange(10),
                                'participant': p2,
                                'samplerate': 1
                            })

    ts3 = TimeSeries.create(data.reshape(10, 3) * 2,
                            None,
                            dims=dims,
                            coords={
                                'measurement': np.arange(10),
                                'participant': p2,
                                'samplerate': 2
                            })

    ts4 = TimeSeries.create(data.reshape(10, 3) * 2,
                            None,
                            dims=dims,
                            coords={
                                'measurement': np.linspace(0, 1, 10),
                                'participant': p2,
                                'samplerate': 2
                            })

    combined = ts1.append(ts2, dim='participant')

    assert isinstance(combined, TimeSeries)
    assert (combined.participant.data['height'] == np.array(
        [180, 150, 200, 170, 250, 150])).all()
    names = np.array(
        [b'John', b'Stacy', b'Dick', b'Bernie', b'Donald', b'Hillary'])
    assert (combined.participant.data['name'] == names).all()

    # incompatible sample rates
    with pytest.raises(ConcatenationError):
        ts1.append(ts3)

    # incompatible other dimensions (measurement)
    with pytest.raises(ConcatenationError):
        ts1.append(ts4)