Ejemplo n.º 1
0
def test_hdf(tempdir):
    """Test saving/loading with HDF5."""
    data = np.random.random((10, 10, 10, 10))
    dims = ('time', 'x', 'y', 'z')
    coords = {label: np.linspace(0, 1, 10) for label in dims}
    rate = 1

    ts = TimeSeries.create(data, rate, coords=coords, dims=dims, name="test")

    filename = osp.join(tempdir, "timeseries.h5")
    ts.to_hdf(filename)

    with h5py.File(filename, 'r') as hfile:
        assert "data" in hfile
        assert "dims" in hfile
        assert "coords" in hfile
        assert "name" in list(hfile['/'].attrs.keys())
        assert "ptsa_version" in hfile.attrs
        assert "created" in hfile.attrs

    loaded = TimeSeries.from_hdf(filename)
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        assert (loaded.coords[coord] == ts.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "test"

    ts_with_attrs = TimeSeries.create(data,
                                      rate,
                                      coords=coords,
                                      dims=dims,
                                      name="test",
                                      attrs=dict(a=1, b=[1, 2]))
    ts_with_attrs.to_hdf(filename)
    loaded = TimeSeries.from_hdf(filename)
    for key in ts_with_attrs.attrs:
        assert ts_with_attrs.attrs[key] == loaded.attrs[key]
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        assert (loaded.coords[coord] == ts.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "test"

    # test compression:
    ts_with_attrs.to_hdf(filename, compression='gzip', compression_opts=9)
    loaded = TimeSeries.from_hdf(filename)
    for key in ts_with_attrs.attrs:
        assert ts_with_attrs.attrs[key] == loaded.attrs[key]
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        assert (loaded.coords[coord] == ts.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "test"
def test_load_hdf_base64():
    """Test that we can still load the base64-encoded HDF5 format."""
    filename = osp.join(osp.dirname(__file__), "data", "R1111M_base64.h5")
    ts = TimeSeries.from_hdf(filename)

    assert "event" in ts.coords
    assert len(ts.coords["event"] == 10)
    def test_hdf(self, tmpdir):
        from cmlreaders.readers.readers import EventReader
        from unittest.mock import patch

        efile = osp.join(osp.dirname(__file__), "data",
                         "R1111M_FR1_0_events.json")
        filename = str(tmpdir.join("test.h5"))

        events = EventReader.fromfile(efile,
                                      subject="R1111M",
                                      experiment="FR1")
        ev = events[events.eegoffset > 0].sample(n=5)

        rel_start, rel_stop = 0, 10
        get_eeg = partial(self.make_eeg, ev, rel_start, rel_stop)

        reader = self.reader

        with patch.object(reader, "load_eeg", return_value=get_eeg()):
            eeg = reader.load_eeg(events=ev, rel_start=0, rel_stop=10)

        ts = eeg.to_ptsa()
        ts.to_hdf(filename)

        ts2 = TimeSeries.from_hdf(filename)
        assert_timeseries_equal(ts, ts2)
    def test_hdf_rhino(self, tmpdir):
        from cmlreaders.warnings import MultiplePathsFoundWarning

        filename = str(tmpdir.join("test.h5"))

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", MultiplePathsFoundWarning)

            events = self.reader.load("events")
            ev = events[events.eegoffset > 0].sample(n=5)
            eeg = self.reader.load_eeg(events=ev, rel_start=0, rel_stop=10)

        ts = eeg.to_ptsa()
        ts.to_hdf(filename)

        ts2 = TimeSeries.from_hdf(filename)
        assert_timeseries_equal(ts, ts2)
Ejemplo n.º 5
0
def test_hdf(tempdir):
    """Test saving/loading with HDF5."""
    data = np.random.random((10, 10, 10, 10))
    dims = ('time', 'x', 'y', 'z')
    coords = {label: np.linspace(0, 1, 10) for label in dims}
    rate = 1

    ts = TimeSeries.create(data, rate, coords=coords, dims=dims, name="test")

    filename = osp.join(tempdir, "timeseries.h5")
    ts.to_hdf(filename)

    with h5py.File(filename, 'r') as hfile:
        assert "data" in hfile
        assert "dims" in hfile
        assert "coords" in hfile
        assert "name" in list(hfile['/'].attrs.keys())
        assert "ptsa_version" in hfile.attrs
        assert "created" in hfile.attrs

    loaded = TimeSeries.from_hdf(filename)
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        assert (loaded.coords[coord] == ts.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "test"

    ts_with_attrs = TimeSeries.create(data,
                                      rate,
                                      coords=coords,
                                      dims=dims,
                                      name="test",
                                      attrs=dict(a=1, b=[1, 2]))
    ts_with_attrs.to_hdf(filename)
    loaded = TimeSeries.from_hdf(filename)
    for key in ts_with_attrs.attrs:
        assert ts_with_attrs.attrs[key] == loaded.attrs[key]
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        assert (loaded.coords[coord] == ts_with_attrs.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "test"

    # test compression:
    ts_with_attrs.to_hdf(filename, compression='gzip', compression_opts=9)
    loaded = TimeSeries.from_hdf(filename)
    for key in ts_with_attrs.attrs:
        assert ts_with_attrs.attrs[key] == loaded.attrs[key]
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        assert (loaded.coords[coord] == ts_with_attrs.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "test"

    # test different containers as dims:
    data = np.random.random((3, 7, 10, 4))
    dims = ('time', 'recarray', 'list', 'recordarray')
    coords = {
        'time':
        np.linspace(0, 1, 3),
        'recarray':
        np.array([(i, j, k) for i, j, k in zip(np.linspace(
            0, 1, 7), np.linspace(1000, 2000, 7), np.linspace(0, 1, 7))],
                 dtype=[('field1', np.float), ('field2', np.int),
                        ('field3', 'U20')]),
        'list':
        list(np.linspace(100, 200, 10)),
        'recordarray':
        np.array([(i, j, k) for i, j, k in zip(np.linspace(
            0, 1, 4), np.linspace(1000, 2000, 4), np.linspace(0, 1, 4))],
                 dtype=[('field1', np.float), ('field2', np.int),
                        ('field3', 'U20')]).view(np.recarray)
    }
    rate = 1
    ts = TimeSeries.create(data,
                           rate,
                           coords=coords,
                           dims=dims,
                           name="container test")
    ts.to_hdf(filename, compression='gzip', compression_opts=9)
    with h5py.File(filename, 'r') as hfile:
        assert "data" in hfile
        assert "dims" in hfile
        assert "coords" in hfile
        assert "name" in list(hfile['/'].attrs.keys())
        assert "ptsa_version" in hfile.attrs
        assert "created" in hfile.attrs
    loaded = TimeSeries.from_hdf(filename)
    for key in ts.attrs:
        assert ts.attrs[key] == loaded.attrs[key]
    assert np.all(loaded.data == data)
    for coord in loaded.coords:
        # dtypes can be slightly differnt for recarrays:
        assert (np.array(loaded.coords[coord],
                         ts[coord].values.dtype) == ts.coords[coord]).all()
    for coord in ts.coords:
        # dtypes can be slightly differnt for recarrays:
        assert (np.array(loaded.coords[coord],
                         ts[coord].values.dtype) == ts.coords[coord]).all()
    for n, dim in enumerate(dims):
        assert loaded.dims[n] == dim
    assert loaded.name == "container test"
Ejemplo n.º 6
0
def calc_subj_pep(subj,
                  elecs=None,
                  method='bip',
                  relstart=300,
                  relstop=1301,
                  freq_specs=(2, 120, 30),
                  percentthresh=.95,
                  numcyclesthresh=3,
                  load_eeg=False,
                  save_eeg=False,
                  save_result=False,
                  plot=False,
                  kind='r1',
                  experiment='FR1',
                  eeg_path='~/',
                  result_path='~/'):
    """
    Inputs:
    subj - subject string
    elecs - list of electrode pairs (strings)
    method - bip or avg depending on referencing scheme
    freq_specs - tuple of (low_freq, high_freq, num_freqs)
        for background fitting in BOSC.

    Returns:
    pep_all - average Pepisode for all words at each frequency
    pep_rec - average Pepisode for recalled words at each frequency
    pep_nrec - average Pepisode for non-recalled words at each frequency
    subj_tscores - t-score at each frequency, comparing rec and nrec
        across events
    ** Note that tscore is not itself meaningful because events are not
        independent. Comparing these tscores across subjects, however,
        is valid.
    """

    if save_eeg and load_eeg:
        raise ('Cannot save and load eeg simultaneously.')

    print('Subject: ', subj)
    if elecs is None:
        good_subj = pd.read_pickle(
            '/home1/jrudoler/Theta_Project/hippo_subject_pairs.csv')
        elecs = good_subj[good_subj['Subject'] == subj]['hippo_pairs'].iloc[0]
    subj_pepisode = None
    subj_recalled = None
    subj_tscores = None
    if plot:
        plt.figure(figsize=(12, 6))
    lowfreq, highfreq, numfreqs = freq_specs
    print(elecs)
    for pair_str in elecs:
        chans = pair_str.split('-')
        data = cml.get_data_index(kind=kind)
        data = data[data['experiment'] == experiment]
        sessions = data[data['subject'] == subj]['session'].unique()
        pepisodes = None  # events, freqs
        recalled = None  # events, freqs
        tscore = None
        for sess in sessions:
            try:
                print('Loading session {} EEG'.format(sess))
                reader = cml.CMLReader(subject=subj,
                                       experiment=experiment,
                                       session=sess)
                all_events = reader.load('task_events')
                if not os.path.exists(eeg_path):
                    os.makedirs(eeg_path)
                if load_eeg:
                    eeg = TimeSeries.from_hdf(eeg_path + 'session_' +
                                              str(sess) + '_' + pair_str)
                    bosc = P_episode(all_events,
                                     eeg,
                                     sr=eeg.samplerate.values,
                                     lowfreq=lowfreq,
                                     highfreq=highfreq,
                                     numfreqs=numfreqs)
                elif method == 'bip':
                    pairs = reader.load("pairs")
                    # bipolar eeg
                    bip = reader.load_eeg(
                        scheme=pairs[pairs.label == pair_str])\
                        .to_ptsa().mean(['event', 'channel'])
                    bip = ButterworthFilter(bip,
                                            freq_range=[58., 62.],
                                            filt_type='stop',
                                            order=4).filter()
                    print("Applying BOSC method!")
                    if save_eeg:
                        bip.to_hdf(eeg_path + 'session_' + str(sess) + '_' +
                                   pair_str)
                    bosc = P_episode(all_events,
                                     bip,
                                     sr=bip.samplerate.values,
                                     lowfreq=lowfreq,
                                     highfreq=highfreq,
                                     numfreqs=numfreqs)

                elif method == 'avg':
                    contacts = reader.load("contacts")
                    # average eeg
                    eeg = reader.load_eeg(
                        scheme=contacts).to_ptsa().mean('event')
                    # all zeros from a broken lead leads to -inf power,
                    # which results in a LinAlg error for log-log fit
                    # TODO: verify this channel exclusion doesn't cause any
                    # problems. Maybe print a message or raise an error?
                    bad_chan_mask = ~np.all(eeg.values == 0, axis=1)
                    contacts = contacts[bad_chan_mask]
                    eeg = eeg[bad_chan_mask, :]
                    avg = (eeg[contacts.label.str.contains(chans[0]) | \
                               contacts.label.str.contains(chans[1]), :] - eeg.mean('channel')
                           ).mean('channel')
                    avg = ButterworthFilter(avg,
                                            freq_range=[58., 62.],
                                            filt_type='stop',
                                            order=4).filter()
                    if save_eeg:
                        avg.to_hdf(eeg_path + '/session_' + str(sess) + '_' +
                                   pair_str)
                    bosc = P_episode(all_events,
                                     avg,
                                     sr=avg.samplerate.values,
                                     lowfreq=lowfreq,
                                     highfreq=highfreq,
                                     numfreqs=numfreqs)

                if plot:
                    bosc.background_fit(plot_type='session')

                if pepisodes is None:
                    pepisodes = bosc.Pepisode
                    # be careful to only use events from lists that have eeg data.
                    # [np.isin(bosc.interest_events.list, self.lists)]
                    recalled = bosc.interest_events.recalled.values
                    tscore, _ = scp.ttest_ind(pepisodes[recalled],
                                              pepisodes[~recalled],
                                              axis=0)
                elif np.isnan(tscore).all():
                    tscore, _ = scp.ttest_ind(pepisodes[recalled],
                                              pepisodes[~recalled],
                                              axis=0)
                else:
                    pepisodes = np.vstack([pepisodes, bosc.Pepisode])
                    recalled = np.hstack(
                        [recalled, bosc.interest_events.recalled.values])
                    t, _ = scp.ttest_ind(pepisodes[recalled],
                                         pepisodes[~recalled],
                                         axis=0)
                    tscore = np.vstack([tscore, t])
                print("Proportion recalled:", recalled.mean())
            except IndexError:
                print('IndexError for subject {} session {}'.format(
                    subj, sess))
            except FileNotFoundError:
                print('FileNotFoundError for {} session {}'.format(subj, sess))
                continue
        if pepisodes is None:
            raise Exception('No working sessions')
        subj_pepisode = pepisodes if subj_pepisode is None else np.dstack(
            [subj_pepisode, pepisodes])
        subj_recalled = recalled if subj_recalled is None else np.vstack(
            [subj_recalled, recalled])
        subj_tscores = tscore if subj_tscores is None else np.vstack(
            [subj_tscores, tscore])
        if np.isnan(subj_tscores).all():
            raise Exception('Too many nan in T-scores. This problem can arise \
                when there are no recalled events.')
    if subj_pepisode.ndim > 2:  # if multiple electrode pairs, average over pairs
        print("Averaging over {} electrodes for subject {}".format(
            subj_pepisode.shape[2], subj))
        subj_pepisode = subj_pepisode.mean(2)
        subj_recalled = subj_recalled.mean(0)
    subj_recalled = subj_recalled.astype(bool)
    if subj_tscores.ndim > 1:
        print(len(sessions), 'sessions')
        subj_tscores = np.nanmean(subj_tscores, axis=0)

    print('{} total events: {} recalled \
    	and {} non-recalled'.format(len(subj_recalled), sum(subj_recalled),
                                 sum(~subj_recalled)))

    pep_rec = subj_pepisode[subj_recalled, :].mean(0)
    pep_nrec = subj_pepisode[~subj_recalled, :].mean(0)
    pep_all = subj_pepisode.mean(0)

    if save_result:
        if not os.path.exists(result_path):
            os.makedirs(result_path)
        np.save(result_path + '{}_all_{}'.format(subj, method), pep_all)
        np.save(result_path + '{}_rec_{}'.format(subj, method), pep_rec)
        np.save(result_path + '{}_nrec_{}'.format(subj, method), pep_nrec)
        np.save(result_path + '{}_tscore_{}'.format(subj, method),
                subj_tscores)

    return pep_all, pep_rec, pep_nrec, subj_tscores