Python Dataset Beispiele, rerpy.data.Dataset Python Beispiele

Beispiel #1

0

Datei anzeigen

def test_transforms():
    dataset = mock_dataset()
    saved_datas = []
    for data in dataset:
        saved_datas.append(np.array(data))
    tr1 = np.eye(dataset.data_format.num_channels)
    tr1[0, 0] = 2
    tr1[0, 1] = -0.5
    dataset.transform(tr1)
    for saved_data, data in zip(saved_datas, dataset):
        assert np.allclose(np.dot(saved_data, tr1.T), data)

    dataset_copy = Dataset(dataset.data_format)
    dataset_copy.add_dataset(dataset)
    assert len(saved_datas) == len(dataset_copy)
    for saved_data, copy_data in zip(saved_datas, dataset_copy):
        assert np.allclose(np.dot(saved_data, tr1.T), copy_data)

    tr2 = np.eye(dataset.data_format.num_channels)
    tr2[-1, -1] = -3
    tr2[1, 0] = 2.5
    dataset.transform(tr2)

    for saved_data, copy_data in zip(saved_datas, dataset_copy):
        assert np.allclose(np.dot(saved_data, tr1.T), copy_data)

    tr_both = np.dot(tr2, tr1)
    for saved_data, data in zip(saved_datas, dataset):
        assert np.allclose(np.dot(saved_data, tr_both.T), data)

Beispiel #2

0

Datei anzeigen

Datei: test_data.py Projekt: rerpy/rerpy

def test_transforms():
    dataset = mock_dataset()
    saved_datas = []
    for data in dataset:
        saved_datas.append(np.array(data))
    tr1 = np.eye(dataset.data_format.num_channels)
    tr1[0, 0] = 2
    tr1[0, 1] = -0.5
    dataset.transform(tr1)
    for saved_data, data in zip(saved_datas, dataset):
        assert np.allclose(np.dot(saved_data, tr1.T), data)

    dataset_copy = Dataset(dataset.data_format)
    dataset_copy.add_dataset(dataset)
    assert len(saved_datas) == len(dataset_copy)
    for saved_data, copy_data in zip(saved_datas, dataset_copy):
        assert np.allclose(np.dot(saved_data, tr1.T), copy_data)

    tr2 = np.eye(dataset.data_format.num_channels)
    tr2[-1, -1] = -3
    tr2[1, 0] = 2.5
    dataset.transform(tr2)

    for saved_data, copy_data in zip(saved_datas, dataset_copy):
        assert np.allclose(np.dot(saved_data, tr1.T), copy_data)

    tr_both = np.dot(tr2, tr1)
    for saved_data, data in zip(saved_datas, dataset):
        assert np.allclose(np.dot(saved_data, tr_both.T), data)

Beispiel #3

0

Datei anzeigen

Datei: test_data.py Projekt: rerpy/rerpy

def mock_dataset(num_channels=4, num_recspans=4, ticks_per_recspan=100,
                 hz=250, lazy="mixed"):
    assert lazy in ["all", "mixed", "none"]
    data_format = DataFormat(hz, "uV",
                             ["MOCK%s" % (i,) for i in xrange(num_channels)])
    dataset = Dataset(data_format)
    r = np.random.RandomState(0)
    for i in xrange(num_recspans):
        data = r.normal(size=(ticks_per_recspan, num_channels))
        if lazy == "all" or (lazy == "mixed" and i % 2 == 0):
            lr = FakeLazyRecspan(data)
            dataset.add_lazy_recspan(lr, ticks_per_recspan, {})
        else:
            dataset.add_recspan(data, {})
    return dataset

Beispiel #4

0

Datei anzeigen

def mock_dataset(num_channels=4,
                 num_recspans=4,
                 ticks_per_recspan=100,
                 hz=250,
                 lazy="mixed"):
    assert lazy in ["all", "mixed", "none"]
    data_format = DataFormat(hz, "uV",
                             ["MOCK%s" % (i, ) for i in xrange(num_channels)])
    dataset = Dataset(data_format)
    r = np.random.RandomState(0)
    for i in xrange(num_recspans):
        data = r.normal(size=(ticks_per_recspan, num_channels))
        if lazy == "all" or (lazy == "mixed" and i % 2 == 0):
            lr = FakeLazyRecspan(data)
            dataset.add_lazy_recspan(lr, ticks_per_recspan, {})
        else:
            dataset.add_recspan(data, {})
    return dataset

Beispiel #5

0

Datei anzeigen

def test_Dataset():
    data_format = DataFormat(250, "uV", ["MOCK1", "MOCK2"])
    dataset = Dataset(data_format)

    assert len(dataset) == 0
    assert_raises(IndexError, dataset.__getitem__, 0)
    assert_raises(TypeError, dataset.__getitem__, slice(0, 0))
    assert list(dataset) == []

    dataset.add_recspan(np.ones((10, 2)) * 0, {"a": 0})
    dataset.add_lazy_recspan(FakeLazyRecspan(np.ones((20, 2)) * 1), 20,
                             {"a": 1})
    dataset.add_lazy_recspan(FakeLazyRecspan(np.ones((30, 2)) * 0), 30,
                             {"a": 2})
    dataset.add_recspan(np.ones((40, 2)) * 1, {"a": 3})

    assert len(dataset) == 4
    assert_raises(IndexError, dataset.__getitem__, 4)

    dataset.add_event(1, 10, 11, {"foo": "bar"})

    def t(ds, recspan_id, expected_values=None):
        recspan = ds[recspan_id]
        assert isinstance(recspan, pandas.DataFrame)
        expected_ticks = 10 * (recspan_id + 1)
        assert recspan.shape == (expected_ticks, 2)
        # 1/250 Hz = 4.0 ms
        assert np.all(recspan.index == np.arange(expected_ticks) * 4.0)
        # index is supposed to be floats. Not sure if that means float or
        # np.float64, but this check should work for either:
        assert isinstance(recspan.index[0], float)
        assert np.all(recspan.columns == ["MOCK1", "MOCK2"])
        # Values are supposed to be floating point as well.
        assert type(recspan.iloc[0, 0]) is np.float64
        if expected_values is None:
            local_recspan_id = recspan_id % 2
            expected_values = local_recspan_id
        assert np.allclose(recspan, expected_values)
        assert np.allclose(ds.raw_slice(recspan_id, 0, recspan.shape[0]),
                           recspan)
        assert_raises(IndexError, ds.raw_slice, recspan_id, -1, 10)
        assert_raises(IndexError, ds.raw_slice, recspan_id, 10, -1)
        assert ds.raw_slice(recspan_id, 2, 2).shape == (0, 2)
        assert np.all(ds.raw_slice(recspan_id, 2, 5) == recspan.iloc[2:5, :])
        assert_raises(IndexError, ds.raw_slice, recspan_id, 0, 200)

        assert ds.recspan_infos[recspan_id]["a"] == recspan_id
        assert ds.recspan_infos[recspan_id].ticks == expected_ticks

    for i in xrange(4):
        t(dataset, i)

    # DataFormat mismatch
    diff_dataset = Dataset(DataFormat(500, "uV", ["MOCK1", "MOCK2"]))
    assert_raises(ValueError, diff_dataset.add_dataset, dataset)

    dataset_copy = Dataset(data_format)
    dataset_copy.add_dataset(dataset)
    assert len(dataset_copy) == 4
    for i in xrange(4):
        t(dataset_copy, i)
    assert len(dataset_copy.events()) == 1
    assert dict(dataset_copy.events()[0]) == {"foo": "bar"}
    assert dataset_copy.events()[0].recspan_id == 1

    assert_raises(ValueError, dataset.transform, np.eye(2), exclude=["MOCK1"])
    dataset.transform([[2, 0], [0, 3]])
    # Transforming the first data set doesn't affect the second
    for i in xrange(4):
        t(dataset_copy, i)
    # But it does affect the first!
    for i in xrange(4):
        t(dataset, i, expected_values=[[2 * (i % 2), 3 * (i % 2)]])
    # Try a symbolic transform too -- it should stack with the previous
    # transform.
    dataset.transform("-MOCK1/3", exclude=["MOCK1"])
    for i in xrange(4):
        t(dataset,
          i,
          expected_values=[[2 * (i % 2), 3 * (i % 2) - (2. / 3) * (i % 2)]])

    # Also check that changing one Dataset's metadata doesn't affect the copy.
    dataset.recspan_infos[0]["a"] = 100
    assert dataset.recspan_infos[0]["a"] == 100
    assert dataset_copy.recspan_infos[0]["a"] == 0
    # Set it back to avoid any confusion later in the test
    dataset.recspan_infos[0]["a"] = 0

    # Check __iter__
    recspans = list(dataset)
    assert len(recspans) == 4
    from pandas.util.testing import assert_frame_equal
    for i in xrange(4):
        assert_frame_equal(recspans[i], dataset[i])

    # Smoke test
    repr(dataset)

Beispiel #6

0

Datei anzeigen

def load_erpss(raw,
               log,
               calibration_events="condition == 0",
               lazy=True,
               calibrate=False,
               calibrate_half_width_ticks=5,
               calibrate_low_cursor_time=None,
               calibrate_high_cursor_time=None,
               calibrate_pulse_size=None,
               calibrate_polarity=1):
    dtype = np.float64

    metadata = {}
    if isinstance(raw, basestring):
        metadata["raw_file"] = os.path.abspath(raw)
    if isinstance(log, basestring):
        metadata["log_file"] = os.path.abspath(log)
    metadata["calibration_events"] = str(calibration_events)

    raw = maybe_open(raw)
    log = maybe_open(log)

    (fetcher, hz, channel_names, raw_codes, data,
     header_metadata) = read_raw(raw, dtype, lazy)
    metadata.update(header_metadata)
    if calibrate:
        units = "uV"
    else:
        units = "RAW"
    data_format = DataFormat(hz, units, channel_names)

    total_ticks = raw_codes.shape[0]

    raw_log_events = read_log(log)
    expanded_log_codes = np.zeros(raw_codes.shape, dtype=int)
    try:
        expanded_log_codes[raw_log_events.index] = raw_log_events["code"]
    except IndexError as e:
        raise ValueError("log file claims event at position where there is "
                         "no data: %s" % (e, ))
    # Sometimes people "delete" events by setting the high (sign) bit of the
    # code in the log file (e.g. with 'logpoke'). So we ignore this bit when
    # comparing log codes to raw codes -- mismatches here do not indicate an
    # error -- and then are careful to use the log codes, rather than the
    # raw codes, below.
    if np.any((expanded_log_codes & ~0x8000) != (raw_codes & ~0x8000)):
        raise ValueError("raw and log files have mismatched codes")
    del raw_codes
    del expanded_log_codes

    pause_events = (raw_log_events["code"] == PAUSE_CODE)
    delete_events = (raw_log_events["code"] == DELETE_CODE)
    break_events = pause_events | delete_events
    break_ticks = raw_log_events.index[break_events]
    # The pause/delete code appears at the last sample of the old era, so if
    # used directly, adjacent pause ticks give contiguous spans of recording
    # as (pause1, pause2]. (Confirmed by checking by hand in a real recording
    # that the data associated with the sample that has the pause code is
    # contiguous with the sample before, but not the sample after.)  Adding +1
    # to each of them then converts this to Python style [pause1, pause2)
    # intervals. There is a pause code at the last record of the file, but not
    # one at the first, so we add that in explicitly.
    break_ticks += 1
    span_edges = np.concatenate(([0], break_ticks))
    assert span_edges[0] == 0
    assert span_edges[-1] == total_ticks

    span_slices = [
        slice(span_edges[i], span_edges[i + 1])
        for i in xrange(len(span_edges) - 1)
    ]

    dataset = Dataset(data_format)
    for span_slice in span_slices:
        if lazy:
            lr = LazyRecspan(fetcher, dtype, len(channel_names),
                             span_slice.start, span_slice.stop)
            dataset.add_lazy_recspan(lr, span_slice.stop - span_slice.start,
                                     metadata)
        else:
            dataset.add_recspan(data[span_slice, :], metadata)

    span_starts = [s.start for s in span_slices]
    recspan_ids = []
    start_ticks = []
    for tick in raw_log_events.index:
        recspan_id = bisect.bisect(span_starts, tick) - 1
        span_slice = span_slices[recspan_id]
        span_start = span_slice.start
        span_stop = span_slice.stop
        assert span_start <= tick < span_stop
        recspan_ids.append(recspan_id)
        start_ticks.append(tick - span_start)
    stop_ticks = [tick + 1 for tick in start_ticks]
    dataset.add_events(recspan_ids, start_ticks, stop_ticks, raw_log_events)

    for delete_event in dataset.events_query({"code": DELETE_CODE}):
        delete_event.recspan_info["deleted"] = True

    for cal_event in dataset.events_query(calibration_events):
        for key in list(cal_event):
            del cal_event[key]
        cal_event["calibration_pulse"] = True

    if calibrate:
        for kwarg in [
                "calibrate_low_cursor_time", "calibrate_high_cursor_time",
                "calibrate_pulse_size"
        ]:
            if locals()[kwarg] is None:
                raise ValueError("when calibrating, %s= argument must be "
                                 "specified" % (kwarg, ))
        half_width = dataset.data_format.ticks_to_ms(
            calibrate_half_width_ticks)
        cal_vals = {}
        for which, cursor_time in [("low", calibrate_low_cursor_time),
                                   ("high", calibrate_high_cursor_time)]:
            # Round cursor to nearest tick
            cursor_tick = dataset.data_format.ms_to_ticks(cursor_time)
            cursor_time = dataset.data_format.ticks_to_ms(cursor_tick)
            erp = dataset.rerp("calibration_pulse",
                               cursor_time - half_width,
                               cursor_time + half_width,
                               "1",
                               all_or_nothing=True,
                               overlap_correction=False,
                               verbose=False)
            cal_vals[which] = erp.betas["Intercept"].mean()
        cal_diffs = cal_vals["high"] - cal_vals["low"]
        calibrate_pulse_size *= calibrate_polarity
        # For each channel, we want to multiply by a factor with units uV/raw
        # We have calibrate_pulse_size uV = cal_diffs raw
        cal_scaler = calibrate_pulse_size / cal_diffs
        dataset.transform(np.diagflat(np.asarray(cal_scaler)))

    return dataset

Beispiel #7

0

Datei anzeigen

Datei: test_data.py Projekt: rerpy/rerpy

def test_Dataset():
    data_format = DataFormat(250, "uV", ["MOCK1", "MOCK2"])
    dataset = Dataset(data_format)

    assert len(dataset) == 0
    assert_raises(IndexError, dataset.__getitem__, 0)
    assert_raises(TypeError, dataset.__getitem__, slice(0, 0))
    assert list(dataset) == []

    dataset.add_recspan(np.ones((10, 2)) * 0, {"a": 0})
    dataset.add_lazy_recspan(FakeLazyRecspan(np.ones((20, 2)) * 1),
                             20, {"a": 1})
    dataset.add_lazy_recspan(FakeLazyRecspan(np.ones((30, 2)) * 0),
                             30, {"a": 2})
    dataset.add_recspan(np.ones((40, 2)) * 1, {"a": 3})

    assert len(dataset) == 4
    assert_raises(IndexError, dataset.__getitem__, 4)

    dataset.add_event(1, 10, 11, {"foo": "bar"})

    def t(ds, recspan_id, expected_values=None):
        recspan = ds[recspan_id]
        assert isinstance(recspan, pandas.DataFrame)
        expected_ticks = 10 * (recspan_id + 1)
        assert recspan.shape == (expected_ticks, 2)
        # 1/250 Hz = 4.0 ms
        assert np.all(recspan.index == np.arange(expected_ticks) * 4.0)
        # index is supposed to be floats. Not sure if that means float or
        # np.float64, but this check should work for either:
        assert isinstance(recspan.index[0], float)
        assert np.all(recspan.columns == ["MOCK1", "MOCK2"])
        # Values are supposed to be floating point as well.
        assert type(recspan.iloc[0, 0]) is np.float64
        if expected_values is None:
            local_recspan_id = recspan_id % 2
            expected_values = local_recspan_id
        assert np.allclose(recspan, expected_values)
        assert np.allclose(ds.raw_slice(recspan_id, 0, recspan.shape[0]),
                           recspan)
        assert_raises(IndexError,
                      ds.raw_slice, recspan_id, -1, 10)
        assert_raises(IndexError,
                      ds.raw_slice, recspan_id, 10, -1)
        assert ds.raw_slice(recspan_id, 2, 2).shape == (0, 2)
        assert np.all(ds.raw_slice(recspan_id, 2, 5)
                      == recspan.iloc[2:5, :])
        assert_raises(IndexError,
                      ds.raw_slice, recspan_id, 0, 200)

        assert ds.recspan_infos[recspan_id]["a"] == recspan_id
        assert ds.recspan_infos[recspan_id].ticks == expected_ticks

    for i in xrange(4):
        t(dataset, i)

    # DataFormat mismatch
    diff_dataset = Dataset(DataFormat(500, "uV", ["MOCK1", "MOCK2"]))
    assert_raises(ValueError, diff_dataset.add_dataset, dataset)

    dataset_copy = Dataset(data_format)
    dataset_copy.add_dataset(dataset)
    assert len(dataset_copy) == 4
    for i in xrange(4):
        t(dataset_copy, i)
    assert len(dataset_copy.events()) == 1
    assert dict(dataset_copy.events()[0]) == {"foo": "bar"}
    assert dataset_copy.events()[0].recspan_id == 1

    assert_raises(ValueError, dataset.transform, np.eye(2), exclude=["MOCK1"])
    dataset.transform([[2, 0], [0, 3]])
    # Transforming the first data set doesn't affect the second
    for i in xrange(4):
        t(dataset_copy, i)
    # But it does affect the first!
    for i in xrange(4):
        t(dataset, i, expected_values=[[2 * (i % 2), 3 * (i % 2)]])
    # Try a symbolic transform too -- it should stack with the previous
    # transform.
    dataset.transform("-MOCK1/3", exclude=["MOCK1"])
    for i in xrange(4):
        t(dataset, i, expected_values=[[2 * (i % 2),
                                         3 * (i % 2) - (2./3) * (i % 2)]])

    # Also check that changing one Dataset's metadata doesn't affect the copy.
    dataset.recspan_infos[0]["a"] = 100
    assert dataset.recspan_infos[0]["a"] == 100
    assert dataset_copy.recspan_infos[0]["a"] == 0
    # Set it back to avoid any confusion later in the test
    dataset.recspan_infos[0]["a"] = 0

    # Check __iter__
    recspans = list(dataset)
    assert len(recspans) == 4
    from pandas.util.testing import assert_frame_equal
    for i in xrange(4):
        assert_frame_equal(recspans[i], dataset[i])

    # Smoke test
    repr(dataset)

Beispiel #8

0

Datei anzeigen

Datei: erpss.py Projekt: rerpy/rerpy

def load_erpss(raw, log, calibration_events="condition == 0",
               lazy=True,
               calibrate=False,
               calibrate_half_width_ticks=5,
               calibrate_low_cursor_time=None,
               calibrate_high_cursor_time=None,
               calibrate_pulse_size=None,
               calibrate_polarity=1):
    dtype = np.float64

    metadata = {}
    if isinstance(raw, basestring):
        metadata["raw_file"] = os.path.abspath(raw)
    if isinstance(log, basestring):
        metadata["log_file"] = os.path.abspath(log)
    metadata["calibration_events"] = str(calibration_events)

    raw = maybe_open(raw)
    log = maybe_open(log)

    (fetcher, hz, channel_names, raw_codes, data, header_metadata
     ) = read_raw(raw, dtype, lazy)
    metadata.update(header_metadata)
    if calibrate:
        units = "uV"
    else:
        units = "RAW"
    data_format = DataFormat(hz, units, channel_names)

    total_ticks = raw_codes.shape[0]

    raw_log_events = read_log(log)
    expanded_log_codes = np.zeros(raw_codes.shape, dtype=int)
    try:
        expanded_log_codes[raw_log_events.index] = raw_log_events["code"]
    except IndexError as e:
        raise ValueError("log file claims event at position where there is "
                         "no data: %s" % (e,))
    # Sometimes people "delete" events by setting the high (sign) bit of the
    # code in the log file (e.g. with 'logpoke'). So we ignore this bit when
    # comparing log codes to raw codes -- mismatches here do not indicate an
    # error -- and then are careful to use the log codes, rather than the
    # raw codes, below.
    if np.any((expanded_log_codes & ~0x8000) != (raw_codes & ~0x8000)):
        raise ValueError("raw and log files have mismatched codes")
    del raw_codes
    del expanded_log_codes

    pause_events = (raw_log_events["code"] == PAUSE_CODE)
    delete_events = (raw_log_events["code"] == DELETE_CODE)
    break_events = pause_events | delete_events
    break_ticks = raw_log_events.index[break_events]
    # The pause/delete code appears at the last sample of the old era, so if
    # used directly, adjacent pause ticks give contiguous spans of recording
    # as (pause1, pause2]. (Confirmed by checking by hand in a real recording
    # that the data associated with the sample that has the pause code is
    # contiguous with the sample before, but not the sample after.)  Adding +1
    # to each of them then converts this to Python style [pause1, pause2)
    # intervals. There is a pause code at the last record of the file, but not
    # one at the first, so we add that in explicitly.
    break_ticks += 1
    span_edges = np.concatenate(([0], break_ticks))
    assert span_edges[0] == 0
    assert span_edges[-1] == total_ticks

    span_slices = [slice(span_edges[i], span_edges[i + 1])
                   for i in xrange(len(span_edges) - 1)]

    dataset = Dataset(data_format)
    for span_slice in span_slices:
        if lazy:
            lr = LazyRecspan(fetcher, dtype, len(channel_names),
                             span_slice.start, span_slice.stop)
            dataset.add_lazy_recspan(lr, span_slice.stop - span_slice.start,
                                     metadata)
        else:
            dataset.add_recspan(data[span_slice, :], metadata)

    span_starts = [s.start for s in span_slices]
    recspan_ids = []
    start_ticks = []
    for tick in raw_log_events.index:
        recspan_id = bisect.bisect(span_starts, tick) - 1
        span_slice = span_slices[recspan_id]
        span_start = span_slice.start
        span_stop = span_slice.stop
        assert span_start <= tick < span_stop
        recspan_ids.append(recspan_id)
        start_ticks.append(tick - span_start)
    stop_ticks = [tick + 1 for tick in start_ticks]
    dataset.add_events(recspan_ids, start_ticks, stop_ticks,
                       raw_log_events)

    for delete_event in dataset.events_query({"code": DELETE_CODE}):
        delete_event.recspan_info["deleted"] = True

    for cal_event in dataset.events_query(calibration_events):
        for key in list(cal_event):
            del cal_event[key]
        cal_event["calibration_pulse"] = True

    if calibrate:
        for kwarg in ["calibrate_low_cursor_time",
                      "calibrate_high_cursor_time",
                      "calibrate_pulse_size"]:
            if locals()[kwarg] is None:
                raise ValueError("when calibrating, %s= argument must be "
                                 "specified" % (kwarg,))
        half_width = dataset.data_format.ticks_to_ms(calibrate_half_width_ticks)
        cal_vals = {}
        for which, cursor_time in [("low", calibrate_low_cursor_time),
                                   ("high", calibrate_high_cursor_time)]:
            # Round cursor to nearest tick
            cursor_tick = dataset.data_format.ms_to_ticks(cursor_time)
            cursor_time = dataset.data_format.ticks_to_ms(cursor_tick)
            erp = dataset.rerp("calibration_pulse",
                               cursor_time - half_width,
                               cursor_time + half_width,
                               "1",
                               all_or_nothing=True,
                               overlap_correction=False,
                               verbose=False)
            cal_vals[which] = erp.betas["Intercept"].mean()
        cal_diffs = cal_vals["high"] - cal_vals["low"]
        calibrate_pulse_size *= calibrate_polarity
        # For each channel, we want to multiply by a factor with units uV/raw
        # We have calibrate_pulse_size uV = cal_diffs raw
        cal_scaler = calibrate_pulse_size / cal_diffs
        dataset.transform(np.diagflat(np.asarray(cal_scaler)))

    return dataset