def test_append_simple(): """Test appending without regard to dimensions.""" points = 100 data1 = np.random.random(points) data2 = np.random.random(points) coords1 = {'time': np.linspace(0, points, points)} coords2 = {'time': np.linspace(points, points * 2, points)} dims = ["time"] samplerate = 10. # Base case: everything should Just Work ts1 = TimeSeries.create(data1, samplerate, coords=coords1, dims=dims) ts2 = TimeSeries.create(data2, samplerate, coords=coords2, dims=dims) combined = ts1.append(ts2) assert combined.samplerate == samplerate assert (combined.data == np.concatenate([data1, data2])).all() assert combined.dims == ts1.dims assert combined.dims == ts2.dims assert (combined.coords['time'] == np.concatenate( [coords1['time'], coords2['time']])).all() # Append along a new dimension combined = ts1.append(ts2, dim='notyet') assert combined.shape == (2, 100) assert hasattr(combined, 'notyet') # Incompatible sample rates ts1 = TimeSeries.create(data1, samplerate, coords=coords1, dims=dims) ts2 = TimeSeries.create(data2, samplerate + 1, coords=coords2, dims=dims) with pytest.raises(ConcatenationError): ts1.append(ts2)
def test_coords_ops(): data = np.arange(1000).reshape(10, 10, 10) ts_1 = TimeSeries.create(data, None, dims=['x', 'y', 'z'], coords={ 'x': np.arange(10), 'y': np.arange(10), 'z': np.arange(10) * 2, 'samplerate': 1 }) ts_2 = TimeSeries.create(data, None, dims=['x', 'y', 'z'], coords={ 'x': np.arange(10), 'y': np.arange(10), 'z': np.arange(10), 'samplerate': 1 }) ts_out = ts_1 + ts_2 assert ts_out.z.shape[0] == 5 ts_out_1 = ts_1 + ts_2[..., ::2] assert (ts_out_1 == ts_out).all() ts_out_2 = ts_2[..., 1::2] + ts_2[..., ::2] assert ts_out_2.shape[-1] == 0 ts_out_3 = ts_2[..., [0, 2, 3, 4, 8]] + ts_2[..., [3, 4, 8, 9]] assert (ts_out_3.z.data == np.array([3, 4, 8])).all()
def test_samplerate_prop(): data = np.arange(1000).reshape(10, 10, 10) rate = 1000 ts_1 = TimeSeries.create(data, None, coords={'samplerate': 1}) ts_2 = TimeSeries.create(data, None, coords={'samplerate': 2}) with pytest.raises(AssertionError): ts_out = ts_1 + ts_2
def test_addition(i, j, k, expected): data = np.arange(1000).reshape(10, 10, 10) rate = 1000 ts_1 = TimeSeries.create(data, None, coords={'samplerate': 1}) ts_2 = TimeSeries.create(data, None, coords={'samplerate': 1}) ts_out = ts_1 + ts_2 assert ts_out[i, j, k] == expected
def test_hdf(tempdir): """Test saving/loading with HDF5.""" data = np.random.random((10, 10, 10, 10)) dims = ('time', 'x', 'y', 'z') coords = {label: np.linspace(0, 1, 10) for label in dims} rate = 1 ts = TimeSeries.create(data, rate, coords=coords, dims=dims, name="test") filename = osp.join(tempdir, "timeseries.h5") ts.to_hdf(filename) with h5py.File(filename, 'r') as hfile: assert "data" in hfile assert "dims" in hfile assert "coords" in hfile assert "name" in list(hfile['/'].attrs.keys()) assert "ptsa_version" in hfile.attrs assert "created" in hfile.attrs loaded = TimeSeries.from_hdf(filename) assert np.all(loaded.data == data) for coord in loaded.coords: assert (loaded.coords[coord] == ts.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "test" ts_with_attrs = TimeSeries.create(data, rate, coords=coords, dims=dims, name="test", attrs=dict(a=1, b=[1, 2])) ts_with_attrs.to_hdf(filename) loaded = TimeSeries.from_hdf(filename) for key in ts_with_attrs.attrs: assert ts_with_attrs.attrs[key] == loaded.attrs[key] assert np.all(loaded.data == data) for coord in loaded.coords: assert (loaded.coords[coord] == ts.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "test" # test compression: ts_with_attrs.to_hdf(filename, compression='gzip', compression_opts=9) loaded = TimeSeries.from_hdf(filename) for key in ts_with_attrs.attrs: assert ts_with_attrs.attrs[key] == loaded.attrs[key] assert np.all(loaded.data == data) for coord in loaded.coords: assert (loaded.coords[coord] == ts.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "test"
def test_arithmetic_operations(): data = np.arange(1000).reshape(10, 10, 10) rate = 1000 ts_1 = TimeSeries.create(data, None, coords={'samplerate': 1}) ts_2 = TimeSeries.create(data, None, coords={'samplerate': 1}) ts_out = ts_1 + ts_2 print('ts_out=', ts_out)
def LoadPTSA(self, df_row, ev_start=0, ev_len=None, buf=None, strict=None): '''df_row: A selected DataFrame row. ev_start: The relative offset for starting each event in milliseconds. ev_len: The length to make of each event in milliseconds. dividing the eeg into time around event boundaries. buf: Extra time in millieconds to add to both ends of each event. strict: Is a bool enabling ArithmeticError for nans. Returns a PTSA TimeSeries object.''' from ptsa.data.timeseries import TimeSeries data, sr, channels = self.LoadEEG(df_row, ev_start, ev_len, buf, strict) if ev_len is None: st = 0 else: st = ev_start if buf is not None: st -= buf en = st + (data.shape[-1] - 1) * 1000. / sr time = np.linspace(st, en, data.shape[-1]) coords = {'channel': [str(c) for c in channels.label], 'time': time} if ev_len is not None: coords['event'] = \ [{k:v for k,v in r._asdict().items()} for r in self.events.itertuples()] return TimeSeries.create(data, sr, coords=coords, dims=('event', 'channel', 'time'))
def filter(self): """ Chops session into chunks corresponding to events :return: TimeSeries object with chopped session """ chop_on_start_offsets_flag = bool(len(self.start_offsets)) if chop_on_start_offsets_flag: start_offsets = self.start_offsets chopping_axis_name = 'start_offsets' chopping_axis_data = start_offsets else: evs = self.events[self.events.eegfile == self.timeseries.attrs['dataroot']] start_offsets = evs.eegoffset chopping_axis_name = 'events' chopping_axis_data = evs samplerate = float(self.timeseries['samplerate']) offset_time_array = self.timeseries['offsets'] event_chunk_size, start_point_shift = self.get_event_chunk_size_and_start_point_shift( eegoffset=start_offsets[0], samplerate=samplerate, offset_time_array=offset_time_array) event_time_axis = np.arange(event_chunk_size) * (1.0 / samplerate) + ( self.start_time - self.buffer_time) data_list = [] for i, eegoffset in enumerate(start_offsets): start_chop_pos = np.where(offset_time_array >= eegoffset)[0][0] start_chop_pos += start_point_shift selector_array = np.arange(start=start_chop_pos, stop=start_chop_pos + event_chunk_size) chopped_data_array = self.timeseries.isel(time=selector_array) chopped_data_array['time'] = event_time_axis chopped_data_array['start_offsets'] = [i] data_list.append(chopped_data_array) ev_concat_data = xr.concat(data_list, dim='start_offsets') ev_concat_data = ev_concat_data.rename( {'start_offsets': chopping_axis_name}) ev_concat_data[chopping_axis_name] = chopping_axis_data attrs = { "start_time": self.start_time, "end_time": self.end_time, "buffer_time": self.buffer_time } ev_concat_data['samplerate'] = samplerate return TimeSeries.create(ev_concat_data, samplerate, attrs=attrs)
def test_init(): """Test that everything is initialized properly.""" data = np.random.random((10, 10, 10)) rate = 1000 with pytest.raises(AssertionError): TimeSeries(data, {}) with pytest.raises(AssertionError): TimeSeries.create(data, None, coords={}) assert TimeSeries.create(data, None, coords={ 'samplerate': 1 }).samplerate == 1 ts = TimeSeries(data, dict(samplerate=rate)) assert isinstance(ts, xr.DataArray) assert ts.shape == (10, 10, 10) assert ts['samplerate'] == rate
def test_baseline_corrected(): t = np.linspace(0, 10, 100) values = np.array([1] * 50 + [2] * 50) coords = {"time": t} ts = TimeSeries.create(values, 10., coords, dims=("time", )) corrected = ts.baseline_corrected((0, 5)) assert all(ts['time'] == corrected['time']) assert ts['samplerate'] == corrected['samplerate'] assert all(corrected.data[:50] == 0) assert all(corrected.data[50:] == 1)
def test_resampled(): ts = TimeSeries.create(np.linspace(0, 100, 100), 10., dims=['time']) resampled = ts.resampled(20.) assert resampled.data.shape == (200, ) assert resampled['samplerate'] == 20 resampled = ts.resampled(5) assert resampled.data.shape == (50, ) assert resampled['samplerate'] == 5
def test_mean(): """tests various ways to compute mean - collapsing different combination of axes""" data = np.arange(100).reshape(10, 10) ts_1 = TimeSeries.create(data, None, dims=['x', 'y'], coords={ 'x': np.arange(10) * 2, 'y': np.arange(10), 'samplerate': 1 }) grand_mean = ts_1.mean() assert grand_mean == 49.5 x_mean = ts_1.mean(dim='x') assert (x_mean == np.arange(45, 55, 1, dtype=np.float)).all() # checking axes assert (ts_1.y == x_mean.y).all() y_mean = ts_1.mean(dim='y') assert (y_mean == np.arange(4.5, 95, 10, dtype=np.float)).all() # checking axes assert (y_mean.x == ts_1.x).all() # test mean NaN data_2 = np.arange(100, dtype=np.float).reshape(10, 10) np.fill_diagonal(data_2, np.NaN) # data_2[9,9] = 99 ts_2 = TimeSeries.create(data_2, None, dims=['x', 'y'], coords={ 'x': np.arange(10) * 2, 'y': np.arange(10), 'samplerate': 1 }) grand_mean = ts_2.mean(skipna=True) assert grand_mean == 49.5
def test_concatenate(): """make sure we can concatenate easily time series x - test it with rec array as one of the coords. This fails for xarray > 0.7. See https://github.com/pydata/xarray/issues/1434 for details. """ p1 = np.array([('John', 180), ('Stacy', 150), ('Dick', 200)], dtype=[('name', '|S256'), ('height', int)]) p2 = np.array([('Bernie', 170), ('Donald', 250), ('Hillary', 150)], dtype=[('name', '|S256'), ('height', int)]) data = np.arange(50, 80, 1, dtype=np.float) dims = ['measurement', 'participant'] ts1 = TimeSeries.create(data.reshape(10, 3), None, dims=dims, coords={ 'measurement': np.arange(10), 'participant': p1, 'samplerate': 1 }) ts2 = TimeSeries.create(data.reshape(10, 3) * 2, None, dims=dims, coords={ 'measurement': np.arange(10), 'participant': p2, 'samplerate': 1 }) combined = xr.concat((ts1, ts2), dim='participant') assert isinstance(combined, TimeSeries) assert (combined.participant.data['height'] == np.array( [180, 150, 200, 170, 250, 150])).all() assert (combined.participant.data['name'] == np.array( ['John', 'Stacy', 'Dick', 'Bernie', 'Donald', 'Hillary'])).all()
def _create_spike_timeseries(self, spike_data, time, sr, events): # create an TimeSeries object dims = ('event', 'time') coords = { 'event': events[events.columns[events.columns != 'index']].to_records(), 'time': time } return TimeSeries.create(spike_data, samplerate=sr, dims=dims, coords=coords)
def _create_eeg_timeseries(self, grp, events): data = np.array(grp['ev_eeg']) time = grp.attrs['time'] channel = grp.attrs['channel'] sr = grp.attrs['samplerate'] # create an TimeSeries object (in order to make use of their wavelet calculation) dims = ('event', 'time', 'channel') coords = {'event': events[events.columns[events.columns != 'index']].to_records(), 'time': time, 'channel': [channel]} return TimeSeries.create(data, samplerate=sr, dims=dims, coords=coords)
def to_ptsa(self, recarray=False) -> "TimeSeries": # noqa: F821 """Convert to a PTSA :class:`TimeSeriesX` object. Parameters ---------- recarray If True, events get stored as a recarray, to preserve backwards compatibility If False, events get stored as xarray coordinates. Notes ----- Events are first converted from a :class:`pd.DataFrame` to a Numpy recarray and are available as the ``event`` coordinate. """ from ptsa.data.timeseries import TimeSeries dims = ("event", "channel", "time") if self.events is not None: if recarray: events = self.events.to_records() else: for col in self.events.columns: if isinstance(self.events[col].iloc[0], list): self.events[col] = self.events[col].apply(tuple) events = pd.MultiIndex.from_frame(self.events) else: columns = ["eegoffset", "epoch_end"] if len(self.epochs[0]) > 2: columns = [ columns[i] if i < 2 else "column_{}".format(i) for i in range(len(self.epochs[0])) ] events = pd.MultiIndex.from_frame( pd.DataFrame(self.epochs, columns=columns)) if recarray: events = events.to_records(index=False) coords = { "event": events, "channel": self.channels, "time": self.time, } return TimeSeries.create( self.data, samplerate=self.samplerate, dims=dims, coords=coords, )
def test_filtered(): data = np.random.random(1000) dims = ['time'] ts = TimeSeries.create(data, 10, dims=dims) # TODO: real test (i.e., actually care about the filtering) with warnings.catch_warnings(record=True) as w: new_ts = ts.filtered([1, 2]) assert len(w) == 1 assert ts['samplerate'] == new_ts['samplerate'] assert all(ts.data != new_ts.data) for key, attr in ts.attrs.items(): assert attr == new_ts[key] assert ts.name == new_ts.name assert ts.dims == new_ts.dims
def test_add_mirror_buffer(): points = 100 data = np.array([-1] * points + [1] * points) samplerate = 10. coords = {'time': np.linspace(-1, 1, points * 2)} dims = ['time'] ts = TimeSeries.create(data, samplerate, coords=coords, dims=dims) duration = 10 buffered = ts.add_mirror_buffer(duration) assert len(buffered.data) == len(data) + 2 * duration * samplerate with pytest.raises(ValueError): # 100 s is longer than the length of data ts.add_mirror_buffer(100)
def test_remove_buffer(): length = 100 data = np.array([0] * length) samplerate = 10. coords = {'time': np.linspace(-1, 1, length)} dims = ['time'] ts = TimeSeries.create(data, samplerate, coords=coords, dims=dims) with pytest.raises(ValueError): # We can't remove this much ts.remove_buffer(int(samplerate * length + 1)) buffer_dur = 0.1 buffered = ts.add_mirror_buffer(buffer_dur) unbuffered = buffered.remove_buffer(buffer_dur) assert len(unbuffered.data) == len(ts.data) assert (unbuffered.data == ts.data).all()
def test_filter_with(cls, kwargs): ts = TimeSeries.create(np.random.random((2, 100)), samplerate=10, dims=("x", "time"), coords={ "x": range(2), "time": range(100), }) if cls is None: class MyClass(object): pass with pytest.raises(TypeError): ts.filter_with(MyClass) else: tsf = ts.filter_with(cls, **kwargs) assert isinstance(tsf, TimeSeries) assert tsf.data.shape != ts.data.shape
def to_ptsa(self) -> "TimeSeries": """Convert to a PTSA :class:`TimeSeriesX` object. Notes ----- Events are first converted from a :class:`pd.DataFrame` to a Numpy recarray and are available as the ``event`` coordinate. """ from ptsa.data.timeseries import TimeSeries dims = ("event", "channel", "time") if self.events is not None: events = self.events.to_records() else: columns = ["eegoffset", "epoch_end"] if len(self.epochs[0]) > 2: columns = [ columns[i] if i < 2 else "column_{}".format(i) for i in range(len(self.epochs[0])) ] events = pd.DataFrame(self.epochs, columns=columns).to_records(index=False) coords = { "event": events, "channel": self.channels, "time": self.time, } return TimeSeries.create( self.data, samplerate=self.samplerate, dims=dims, coords=coords, )
def load_eeg(events, rel_start_ms, rel_stop_ms, buf_ms=0, elec_scheme=None, noise_freq=[58., 62.], resample_freq=None, pass_band=None, use_mirror_buf=False, demean=False, do_average_ref=False): """ Returns an EEG TimeSeries object. Parameters ---------- events: pandas.DataFrame An events dataframe that contains eegoffset and eegfile fields rel_start_ms: int Initial time (in ms), relative to the onset of each event rel_stop_ms: int End time (in ms), relative to the onset of each event buf_ms: Amount of time (in ms) of buffer to add to both the begining and end of the time interval elec_scheme: pandas.DataFrame A dataframe of electrode information, returned by load_elec_info(). If the column 'contact' is in the dataframe, monopolar electrodes will be loads. If the columns 'contact_1' and 'contact_2' are in the df, bipolar will be loaded. You may pass in a subset of rows to only load data for electrodes in those rows. If you do not enter an elec_scheme, all monopolar channels will be loaded (but they will not be labeled with correct channel tags). Entering a scheme is recommended. noise_freq: list Stop filter will be applied to the given range. Default=(58. 62) resample_freq: float Sampling rate to resample to after loading eeg. pass_band: list If given, the eeg will be band pass filtered in the given range. use_mirror_buf: bool If True, the buffer will be data taken from within the rel_start_ms to rel_stop_ms interval, mirrored and prepended and appended to the timeseries. If False, data outside the rel_start_ms and rel_stop_ms interval will be read. demean: bool If True, will subject the mean voltage between rel_start_ms and rel_stop_ms from each channel do_average_ref: bool If True, will compute the average reference based on the mean voltage across channels Returns ------- TimeSeries EEG timeseries object with dimensions channels x events x time (or bipolar_pairs x events x time) NOTE: The EEG data is returned with time buffer included. If you included a buffer and want to remove it, you may use the .remove_buffer() method. EXTRA NOTE: INPUT SECONDS FOR REMOVING BUFFER, NOT MS!! """ # check if monopolar is possible for this subject if 'contact' in elec_scheme: eegfile = np.unique(events.eegfile)[0] if os.path.splitext(eegfile)[1] == '.h5': eegfile = f'/protocols/r1/subjects/{events.iloc[0].subject}/experiments/{events.iloc[0].experiment}/sessions/{events.iloc[0].session}/ephys/current_processed/noreref/{eegfile}' with h5py.File(eegfile, 'r') as f: if not np.array(f['monopolar_possible'])[0] == 1: print('Monopolar referencing not possible for {}'.format( events.iloc[0].subject)) return # add buffer is using if (buf_ms is not None) and not use_mirror_buf: actual_start = rel_start_ms - buf_ms actual_stop = rel_stop_ms + buf_ms else: actual_start = rel_start_ms actual_stop = rel_stop_ms # load eeg eeg = CMLReader(subject=events.iloc[0].subject).load_eeg( events, rel_start=actual_start, rel_stop=actual_stop, scheme=elec_scheme).to_ptsa() # now auto cast to float32 to help with memory issues with high sample rate data eeg.data = eeg.data.astype('float32') # baseline correct subracting the mean within the baseline time range if demean: eeg = eeg.baseline_corrected([rel_start_ms, rel_stop_ms]) # compute average reference by subracting the mean across channels if do_average_ref: eeg = eeg - eeg.mean(dim='channel') # add mirror buffer if using. PTSA is expecting this to be in seconds. if use_mirror_buf: eeg = eeg.add_mirror_buffer(buf_ms / 1000.) # filter line noise if noise_freq is not None: if isinstance(noise_freq[0], float): noise_freq = [noise_freq] for this_noise_freq in noise_freq: for this_chan in range(eeg.shape[1]): b_filter = ButterworthFilter(eeg[:, this_chan:this_chan + 1], this_noise_freq, filt_type='stop', order=4) eeg[:, this_chan:this_chan + 1] = b_filter.filter() # resample if desired. Note: can be a bit slow especially if have a lot of eeg data # pdb.set_trace() # if resample_freq is not None: # for this_chan in range(eeg.shape[1]): # r_filter = ResampleFilter(eeg[:, this_chan:this_chan+1], resample_freq) # eeg[:, this_chan:this_chan + 1] = r_filter.filter() if resample_freq is not None: eeg_resamp = [] for this_chan in range(eeg.shape[1]): r_filter = ResampleFilter(eeg[:, this_chan:this_chan + 1], resample_freq) eeg_resamp.append(r_filter.filter()) coords = {x: eeg[x] for x in eeg.coords.keys()} coords['time'] = eeg_resamp[0]['time'] coords['samplerate'] = resample_freq dims = eeg.dims eeg = TimeSeries.create(np.concatenate(eeg_resamp, axis=1), resample_freq, coords=coords, dims=dims) # do band pass if desired. if pass_band is not None: eeg = band_pass_eeg(eeg, pass_band) # reorder dims to make events first eeg = make_events_first_dim(eeg) return eeg
def _load_eeg_timeseries(events, rel_start_ms, rel_stop_ms, channel_list, buf_ms=0, downsample_freq=1000, resample_freq=None): """ Parameters ---------- events: pandas.DataFrame DataFrame with the column 'stTime', specifying the timestamp when the event occurred rel_start_ms: int Relative time (ms) to add to the stTime to define the start of the time interval rel_stop_ms: int Relative time (ms) to add to the stTime to define the end of the time interval channel_list: list List of channel Ncs files buf_ms: Buffer (ms) to add to the start and end of the time period downsample_freq: int sample rate to downsample sample initial data immediately after loading the full file resample_freq: int Resample eeg to this value. Done after epoching. Returns ------- ptsa.TimeSeries with dims event x time x channel """ # will build a list of eeg data that we will concatenate across channels eeg_list = [] # epochs will be a list of tuples of start and stop sample offsets epochs = None for channel in channel_list: # load channel data signals, timestamps, sr = load_ncs(channel) if downsample_freq is not None: signals, timestamps, sr = _my_downsample(signals, timestamps, sr, downsample_freq) # get start and stop samples (only once) # assumes all channels have the same timestamps.. if epochs is None: epochs = _compute_epochs(events, rel_start_ms - buf_ms, rel_stop_ms + buf_ms, timestamps, sr) # remove any epochs < 0 bad_epochs = (np.any(epochs < 0, 1)) | (np.any( epochs > len(signals), 1)) epochs = epochs[~bad_epochs] events = events[~bad_epochs].reset_index(drop=True) # segment the continuous eeg into epochs. Also resample. eeg, new_time = _segment_eeg_single_channel(signals, epochs, sr, timestamps, resample_freq) eeg_list.append(eeg) # create timeseries dims = ('event', 'time', 'channel') coords = { 'event': events.to_records(), 'time': (new_time[0] - events.stTime.values[0]) / 1e6, 'channel': channel_list } sr_for_ptsa = resample_freq if resample_freq is not None else sr eeg_all_chans = TimeSeries.create(np.stack(eeg_list, -1), samplerate=sr_for_ptsa, dims=dims, coords=coords) return eeg_all_chans
def power_spectra_from_spike_times(s_times, clust_nums, channel_file, rel_start_ms, rel_stop_ms, freqs, noise_freq=[58., 62.], downsample_freq=250, mean_over_spikes=True): """ Function to compute power relative to spike times. This computes power at given frequencies for the ENTIRE session and then bins it relative to spike times. You WILL run out of memory if you don't let it downsample first. Default downsample is to 250 Hz. Parameters ---------- s_times: np.ndarray Array (or list) of timestamps of when spikes occured. EEG will be loaded relative to these times. clust_nums: s_times: np.ndarray Array (or list) of cluster IDs, same size as s_times channel_file: str Path to Ncs file from which to load eeg. rel_start_ms: int Initial time (in ms), relative to the onset of each spike rel_stop_ms: int End time (in ms), relative to the onset of each spike freqs: np.ndarray array of frequencies at which to compute power noise_freq: list Stop filter will be applied to the given range. Default=[58. 62] downsample_freq: int or float Frequency to downsample the data. Use decimate, so we will likely not reach the exact frequency. mean_over_spikes: bool After computing the spike x frequency array, do we mean over spikes and return only the mean power spectra Returns ------- dict dict of either spike x frequency array of power values or just frequencies, if mean_over_spikes. Keys are cluster numbers """ # make a df with 'stTime' column for epoching events = pd.DataFrame(data=np.stack([s_times, clust_nums], -1), columns=['stTime', 'cluster_num']) # load channel data signals, timestamps, sr = load_ncs(channel_file) # downsample the session if downsample_freq is not None: signals, timestamps, sr = _my_downsample(signals, timestamps, sr, downsample_freq) else: print( 'I HIGHLY recommend you downsample the data before computing power across the whole session...' ) print('You will probably run out of memory.') # make into timeseries eeg = TimeSeries.create(signals, samplerate=sr, dims=['time'], coords={'time': timestamps / 1e6}) # filter line noise if noise_freq is not None: if isinstance(noise_freq[0], float): noise_freq = [noise_freq] for this_noise_freq in noise_freq: b_filter = ButterworthFilter(eeg, this_noise_freq, filt_type='stop', order=4) eeg = b_filter.filter() # compute power wave_pow = MorletWaveletFilter(eeg, freqs, output='power', width=5, cpus=12, verbose=False).filter() # log the power data = wave_pow.data wave_pow.data = numexpr.evaluate('log10(data)') # get start and stop relative to the spikes epochs = _compute_epochs(events, rel_start_ms, rel_stop_ms, timestamps, sr) bad_epochs = (np.any(epochs < 0, 1)) | (np.any(epochs > len(signals), 1)) epochs = epochs[~bad_epochs] events = events[~bad_epochs].reset_index(drop=True) # mean over time within epochs spikes_x_freqs = np.stack( [np.mean(wave_pow.data[:, x[0]:x[1]], axis=1) for x in epochs]) # make dict with keys being cluster numbers. Mean over spikes if desired. pow_spect_dict = {} for this_cluster in events.cluster_num.unique(): if mean_over_spikes: pow_spect_dict[this_cluster] = spikes_x_freqs[ events.cluster_num == this_cluster].mean(axis=0) else: pow_spect_dict[this_cluster] = spikes_x_freqs[events.cluster_num == this_cluster] return pow_spect_dict
def test_hdf(tempdir): """Test saving/loading with HDF5.""" data = np.random.random((10, 10, 10, 10)) dims = ('time', 'x', 'y', 'z') coords = {label: np.linspace(0, 1, 10) for label in dims} rate = 1 ts = TimeSeries.create(data, rate, coords=coords, dims=dims, name="test") filename = osp.join(tempdir, "timeseries.h5") ts.to_hdf(filename) with h5py.File(filename, 'r') as hfile: assert "data" in hfile assert "dims" in hfile assert "coords" in hfile assert "name" in list(hfile['/'].attrs.keys()) assert "ptsa_version" in hfile.attrs assert "created" in hfile.attrs loaded = TimeSeries.from_hdf(filename) assert np.all(loaded.data == data) for coord in loaded.coords: assert (loaded.coords[coord] == ts.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "test" ts_with_attrs = TimeSeries.create(data, rate, coords=coords, dims=dims, name="test", attrs=dict(a=1, b=[1, 2])) ts_with_attrs.to_hdf(filename) loaded = TimeSeries.from_hdf(filename) for key in ts_with_attrs.attrs: assert ts_with_attrs.attrs[key] == loaded.attrs[key] assert np.all(loaded.data == data) for coord in loaded.coords: assert (loaded.coords[coord] == ts_with_attrs.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "test" # test compression: ts_with_attrs.to_hdf(filename, compression='gzip', compression_opts=9) loaded = TimeSeries.from_hdf(filename) for key in ts_with_attrs.attrs: assert ts_with_attrs.attrs[key] == loaded.attrs[key] assert np.all(loaded.data == data) for coord in loaded.coords: assert (loaded.coords[coord] == ts_with_attrs.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "test" # test different containers as dims: data = np.random.random((3, 7, 10, 4)) dims = ('time', 'recarray', 'list', 'recordarray') coords = { 'time': np.linspace(0, 1, 3), 'recarray': np.array([(i, j, k) for i, j, k in zip(np.linspace( 0, 1, 7), np.linspace(1000, 2000, 7), np.linspace(0, 1, 7))], dtype=[('field1', np.float), ('field2', np.int), ('field3', 'U20')]), 'list': list(np.linspace(100, 200, 10)), 'recordarray': np.array([(i, j, k) for i, j, k in zip(np.linspace( 0, 1, 4), np.linspace(1000, 2000, 4), np.linspace(0, 1, 4))], dtype=[('field1', np.float), ('field2', np.int), ('field3', 'U20')]).view(np.recarray) } rate = 1 ts = TimeSeries.create(data, rate, coords=coords, dims=dims, name="container test") ts.to_hdf(filename, compression='gzip', compression_opts=9) with h5py.File(filename, 'r') as hfile: assert "data" in hfile assert "dims" in hfile assert "coords" in hfile assert "name" in list(hfile['/'].attrs.keys()) assert "ptsa_version" in hfile.attrs assert "created" in hfile.attrs loaded = TimeSeries.from_hdf(filename) for key in ts.attrs: assert ts.attrs[key] == loaded.attrs[key] assert np.all(loaded.data == data) for coord in loaded.coords: # dtypes can be slightly differnt for recarrays: assert (np.array(loaded.coords[coord], ts[coord].values.dtype) == ts.coords[coord]).all() for coord in ts.coords: # dtypes can be slightly differnt for recarrays: assert (np.array(loaded.coords[coord], ts[coord].values.dtype) == ts.coords[coord]).all() for n, dim in enumerate(dims): assert loaded.dims[n] == dim assert loaded.name == "container test"
def _load_eeg_timeseries(events, rel_start_ms, rel_stop_ms, channel_list, buf_ms=0, downsample_freq=1000, resample_freq=None): """ Parameters ---------- events: pandas.DataFrame DataFrame with the column 'stTime', specifying the timestamp when the event occurred rel_start_ms: int Relative time (ms) to add to the stTime to define the start of the time interval rel_stop_ms: int Relative time (ms) to add to the stTime to define the end of the time interval channel_list: list List of channel Ncs files buf_ms: Buffer (ms) to add to the start and end of the time period downsample_freq: int sample rate to downsample sample initial data immediately after loading the full file resample_freq: int Resample eeg to this value. Done after epoching. Returns ------- ptsa.TimeSeries with dims event x time x channel """ # will build a list of eeg data that we will concatenate across channels eeg_list = [] # epochs will be a list of tuples of start and stop sample offsets epochs = None for channel in channel_list: # load channel data signals, timestamps, sr = load_ncs(channel) if downsample_freq is not None: signals, timestamps, sr = _my_downsample(signals, timestamps, sr, downsample_freq) # get start and stop samples (only once) # assumes all channels have the same timestamps.. if epochs is None: epochs = _compute_epochs(events, rel_start_ms - buf_ms, rel_stop_ms + buf_ms, timestamps, sr) # remove any epochs < 0 bad_epochs = (np.any(epochs < 0, 1)) | (np.any(epochs > len(signals), 1)) epochs = epochs[~bad_epochs] events = events[~bad_epochs].reset_index(drop=True) # segment the continuous eeg into epochs. Also resample. eeg, new_time = _segment_eeg_single_channel(signals, epochs, sr, timestamps, resample_freq) eeg_list.append(eeg) # create timeseries dims = ('event', 'time', 'channel') coords = {'event': events.to_records(), 'time': (new_time[0] - events.stTime.values[0])/1e6, 'channel': channel_list} sr_for_ptsa = resample_freq if resample_freq is not None else sr eeg_all_chans = TimeSeries.create(np.stack(eeg_list, -1), samplerate=sr_for_ptsa, dims=dims, coords=coords) return eeg_all_chans
def power_spectra_from_spike_times(s_times, clust_nums, channel_file, rel_start_ms, rel_stop_ms, freqs, noise_freq=[58., 62.], downsample_freq=250, mean_over_spikes=True): """ Function to compute power relative to spike times. This computes power at given frequencies for the ENTIRE session and then bins it relative to spike times. You WILL run out of memory if you don't let it downsample first. Default downsample is to 250 Hz. Parameters ---------- s_times: np.ndarray Array (or list) of timestamps of when spikes occured. EEG will be loaded relative to these times. clust_nums: s_times: np.ndarray Array (or list) of cluster IDs, same size as s_times channel_file: str Path to Ncs file from which to load eeg. rel_start_ms: int Initial time (in ms), relative to the onset of each spike rel_stop_ms: int End time (in ms), relative to the onset of each spike freqs: np.ndarray array of frequencies at which to compute power noise_freq: list Stop filter will be applied to the given range. Default=[58. 62] downsample_freq: int or float Frequency to downsample the data. Use decimate, so we will likely not reach the exact frequency. mean_over_spikes: bool After computing the spike x frequency array, do we mean over spikes and return only the mean power spectra Returns ------- dict dict of either spike x frequency array of power values or just frequencies, if mean_over_spikes. Keys are cluster numbers """ # make a df with 'stTime' column for epoching events = pd.DataFrame(data=np.stack([s_times, clust_nums], -1), columns=['stTime', 'cluster_num']) # load channel data signals, timestamps, sr = load_ncs(channel_file) # downsample the session if downsample_freq is not None: signals, timestamps, sr = _my_downsample(signals, timestamps, sr, downsample_freq) else: print('I HIGHLY recommend you downsample the data before computing power across the whole session...') print('You will probably run out of memory.') # make into timeseries eeg = TimeSeries.create(signals, samplerate=sr, dims=['time'], coords={'time': timestamps / 1e6}) # filter line noise if noise_freq is not None: if isinstance(noise_freq[0], float): noise_freq = [noise_freq] for this_noise_freq in noise_freq: b_filter = ButterworthFilter(eeg, this_noise_freq, filt_type='stop', order=4) eeg = b_filter.filter() # compute power wave_pow = MorletWaveletFilter(eeg, freqs, output='power', width=5, cpus=12, verbose=False).filter() # log the power data = wave_pow.data wave_pow.data = numexpr.evaluate('log10(data)') # get start and stop relative to the spikes epochs = _compute_epochs(events, rel_start_ms, rel_stop_ms, timestamps, sr) bad_epochs = (np.any(epochs < 0, 1)) | (np.any(epochs > len(signals), 1)) epochs = epochs[~bad_epochs] events = events[~bad_epochs].reset_index(drop=True) # mean over time within epochs spikes_x_freqs = np.stack([np.mean(wave_pow.data[:, x[0]:x[1]], axis=1) for x in epochs]) # make dict with keys being cluster numbers. Mean over spikes if desired. pow_spect_dict = {} for this_cluster in events.cluster_num.unique(): if mean_over_spikes: pow_spect_dict[this_cluster] = spikes_x_freqs[events.cluster_num == this_cluster].mean(axis=0) else: pow_spect_dict[this_cluster] = spikes_x_freqs[events.cluster_num == this_cluster] return pow_spect_dict
def _create_spike_timeseries(self, spike_data, time, sr, events): # create an TimeSeries object dims = ('event', 'time') coords = {'event': events[events.columns[events.columns != 'index']].to_records(), 'time': time} return TimeSeries.create(spike_data, samplerate=sr, dims=dims, coords=coords)
def test_append_recarray(): """Test appending along a dimension with a recarray.""" p1 = np.array([('John', 180), ('Stacy', 150), ('Dick', 200)], dtype=[('name', '|S256'), ('height', int)]) p2 = np.array([('Bernie', 170), ('Donald', 250), ('Hillary', 150)], dtype=[('name', '|S256'), ('height', int)]) data = np.arange(50, 80, 1, dtype=np.float) dims = ['measurement', 'participant'] ts1 = TimeSeries.create(data.reshape(10, 3), None, dims=dims, coords={ 'measurement': np.arange(10), 'participant': p1, 'samplerate': 1 }) ts2 = TimeSeries.create(data.reshape(10, 3) * 2, None, dims=dims, coords={ 'measurement': np.arange(10), 'participant': p2, 'samplerate': 1 }) ts3 = TimeSeries.create(data.reshape(10, 3) * 2, None, dims=dims, coords={ 'measurement': np.arange(10), 'participant': p2, 'samplerate': 2 }) ts4 = TimeSeries.create(data.reshape(10, 3) * 2, None, dims=dims, coords={ 'measurement': np.linspace(0, 1, 10), 'participant': p2, 'samplerate': 2 }) combined = ts1.append(ts2, dim='participant') assert isinstance(combined, TimeSeries) assert (combined.participant.data['height'] == np.array( [180, 150, 200, 170, 250, 150])).all() names = np.array( [b'John', b'Stacy', b'Dick', b'Bernie', b'Donald', b'Hillary']) assert (combined.participant.data['name'] == names).all() # incompatible sample rates with pytest.raises(ConcatenationError): ts1.append(ts3) # incompatible other dimensions (measurement) with pytest.raises(ConcatenationError): ts1.append(ts4)