def test_transforms(): dataset = mock_dataset() saved_datas = [] for data in dataset: saved_datas.append(np.array(data)) tr1 = np.eye(dataset.data_format.num_channels) tr1[0, 0] = 2 tr1[0, 1] = -0.5 dataset.transform(tr1) for saved_data, data in zip(saved_datas, dataset): assert np.allclose(np.dot(saved_data, tr1.T), data) dataset_copy = Dataset(dataset.data_format) dataset_copy.add_dataset(dataset) assert len(saved_datas) == len(dataset_copy) for saved_data, copy_data in zip(saved_datas, dataset_copy): assert np.allclose(np.dot(saved_data, tr1.T), copy_data) tr2 = np.eye(dataset.data_format.num_channels) tr2[-1, -1] = -3 tr2[1, 0] = 2.5 dataset.transform(tr2) for saved_data, copy_data in zip(saved_datas, dataset_copy): assert np.allclose(np.dot(saved_data, tr1.T), copy_data) tr_both = np.dot(tr2, tr1) for saved_data, data in zip(saved_datas, dataset): assert np.allclose(np.dot(saved_data, tr_both.T), data)
def mock_dataset(num_channels=4, num_recspans=4, ticks_per_recspan=100, hz=250, lazy="mixed"): assert lazy in ["all", "mixed", "none"] data_format = DataFormat(hz, "uV", ["MOCK%s" % (i,) for i in xrange(num_channels)]) dataset = Dataset(data_format) r = np.random.RandomState(0) for i in xrange(num_recspans): data = r.normal(size=(ticks_per_recspan, num_channels)) if lazy == "all" or (lazy == "mixed" and i % 2 == 0): lr = FakeLazyRecspan(data) dataset.add_lazy_recspan(lr, ticks_per_recspan, {}) else: dataset.add_recspan(data, {}) return dataset
def mock_dataset(num_channels=4, num_recspans=4, ticks_per_recspan=100, hz=250, lazy="mixed"): assert lazy in ["all", "mixed", "none"] data_format = DataFormat(hz, "uV", ["MOCK%s" % (i, ) for i in xrange(num_channels)]) dataset = Dataset(data_format) r = np.random.RandomState(0) for i in xrange(num_recspans): data = r.normal(size=(ticks_per_recspan, num_channels)) if lazy == "all" or (lazy == "mixed" and i % 2 == 0): lr = FakeLazyRecspan(data) dataset.add_lazy_recspan(lr, ticks_per_recspan, {}) else: dataset.add_recspan(data, {}) return dataset
def test_Dataset(): data_format = DataFormat(250, "uV", ["MOCK1", "MOCK2"]) dataset = Dataset(data_format) assert len(dataset) == 0 assert_raises(IndexError, dataset.__getitem__, 0) assert_raises(TypeError, dataset.__getitem__, slice(0, 0)) assert list(dataset) == [] dataset.add_recspan(np.ones((10, 2)) * 0, {"a": 0}) dataset.add_lazy_recspan(FakeLazyRecspan(np.ones((20, 2)) * 1), 20, {"a": 1}) dataset.add_lazy_recspan(FakeLazyRecspan(np.ones((30, 2)) * 0), 30, {"a": 2}) dataset.add_recspan(np.ones((40, 2)) * 1, {"a": 3}) assert len(dataset) == 4 assert_raises(IndexError, dataset.__getitem__, 4) dataset.add_event(1, 10, 11, {"foo": "bar"}) def t(ds, recspan_id, expected_values=None): recspan = ds[recspan_id] assert isinstance(recspan, pandas.DataFrame) expected_ticks = 10 * (recspan_id + 1) assert recspan.shape == (expected_ticks, 2) # 1/250 Hz = 4.0 ms assert np.all(recspan.index == np.arange(expected_ticks) * 4.0) # index is supposed to be floats. Not sure if that means float or # np.float64, but this check should work for either: assert isinstance(recspan.index[0], float) assert np.all(recspan.columns == ["MOCK1", "MOCK2"]) # Values are supposed to be floating point as well. assert type(recspan.iloc[0, 0]) is np.float64 if expected_values is None: local_recspan_id = recspan_id % 2 expected_values = local_recspan_id assert np.allclose(recspan, expected_values) assert np.allclose(ds.raw_slice(recspan_id, 0, recspan.shape[0]), recspan) assert_raises(IndexError, ds.raw_slice, recspan_id, -1, 10) assert_raises(IndexError, ds.raw_slice, recspan_id, 10, -1) assert ds.raw_slice(recspan_id, 2, 2).shape == (0, 2) assert np.all(ds.raw_slice(recspan_id, 2, 5) == recspan.iloc[2:5, :]) assert_raises(IndexError, ds.raw_slice, recspan_id, 0, 200) assert ds.recspan_infos[recspan_id]["a"] == recspan_id assert ds.recspan_infos[recspan_id].ticks == expected_ticks for i in xrange(4): t(dataset, i) # DataFormat mismatch diff_dataset = Dataset(DataFormat(500, "uV", ["MOCK1", "MOCK2"])) assert_raises(ValueError, diff_dataset.add_dataset, dataset) dataset_copy = Dataset(data_format) dataset_copy.add_dataset(dataset) assert len(dataset_copy) == 4 for i in xrange(4): t(dataset_copy, i) assert len(dataset_copy.events()) == 1 assert dict(dataset_copy.events()[0]) == {"foo": "bar"} assert dataset_copy.events()[0].recspan_id == 1 assert_raises(ValueError, dataset.transform, np.eye(2), exclude=["MOCK1"]) dataset.transform([[2, 0], [0, 3]]) # Transforming the first data set doesn't affect the second for i in xrange(4): t(dataset_copy, i) # But it does affect the first! for i in xrange(4): t(dataset, i, expected_values=[[2 * (i % 2), 3 * (i % 2)]]) # Try a symbolic transform too -- it should stack with the previous # transform. dataset.transform("-MOCK1/3", exclude=["MOCK1"]) for i in xrange(4): t(dataset, i, expected_values=[[2 * (i % 2), 3 * (i % 2) - (2. / 3) * (i % 2)]]) # Also check that changing one Dataset's metadata doesn't affect the copy. dataset.recspan_infos[0]["a"] = 100 assert dataset.recspan_infos[0]["a"] == 100 assert dataset_copy.recspan_infos[0]["a"] == 0 # Set it back to avoid any confusion later in the test dataset.recspan_infos[0]["a"] = 0 # Check __iter__ recspans = list(dataset) assert len(recspans) == 4 from pandas.util.testing import assert_frame_equal for i in xrange(4): assert_frame_equal(recspans[i], dataset[i]) # Smoke test repr(dataset)
def load_erpss(raw, log, calibration_events="condition == 0", lazy=True, calibrate=False, calibrate_half_width_ticks=5, calibrate_low_cursor_time=None, calibrate_high_cursor_time=None, calibrate_pulse_size=None, calibrate_polarity=1): dtype = np.float64 metadata = {} if isinstance(raw, basestring): metadata["raw_file"] = os.path.abspath(raw) if isinstance(log, basestring): metadata["log_file"] = os.path.abspath(log) metadata["calibration_events"] = str(calibration_events) raw = maybe_open(raw) log = maybe_open(log) (fetcher, hz, channel_names, raw_codes, data, header_metadata) = read_raw(raw, dtype, lazy) metadata.update(header_metadata) if calibrate: units = "uV" else: units = "RAW" data_format = DataFormat(hz, units, channel_names) total_ticks = raw_codes.shape[0] raw_log_events = read_log(log) expanded_log_codes = np.zeros(raw_codes.shape, dtype=int) try: expanded_log_codes[raw_log_events.index] = raw_log_events["code"] except IndexError as e: raise ValueError("log file claims event at position where there is " "no data: %s" % (e, )) # Sometimes people "delete" events by setting the high (sign) bit of the # code in the log file (e.g. with 'logpoke'). So we ignore this bit when # comparing log codes to raw codes -- mismatches here do not indicate an # error -- and then are careful to use the log codes, rather than the # raw codes, below. if np.any((expanded_log_codes & ~0x8000) != (raw_codes & ~0x8000)): raise ValueError("raw and log files have mismatched codes") del raw_codes del expanded_log_codes pause_events = (raw_log_events["code"] == PAUSE_CODE) delete_events = (raw_log_events["code"] == DELETE_CODE) break_events = pause_events | delete_events break_ticks = raw_log_events.index[break_events] # The pause/delete code appears at the last sample of the old era, so if # used directly, adjacent pause ticks give contiguous spans of recording # as (pause1, pause2]. (Confirmed by checking by hand in a real recording # that the data associated with the sample that has the pause code is # contiguous with the sample before, but not the sample after.) Adding +1 # to each of them then converts this to Python style [pause1, pause2) # intervals. There is a pause code at the last record of the file, but not # one at the first, so we add that in explicitly. break_ticks += 1 span_edges = np.concatenate(([0], break_ticks)) assert span_edges[0] == 0 assert span_edges[-1] == total_ticks span_slices = [ slice(span_edges[i], span_edges[i + 1]) for i in xrange(len(span_edges) - 1) ] dataset = Dataset(data_format) for span_slice in span_slices: if lazy: lr = LazyRecspan(fetcher, dtype, len(channel_names), span_slice.start, span_slice.stop) dataset.add_lazy_recspan(lr, span_slice.stop - span_slice.start, metadata) else: dataset.add_recspan(data[span_slice, :], metadata) span_starts = [s.start for s in span_slices] recspan_ids = [] start_ticks = [] for tick in raw_log_events.index: recspan_id = bisect.bisect(span_starts, tick) - 1 span_slice = span_slices[recspan_id] span_start = span_slice.start span_stop = span_slice.stop assert span_start <= tick < span_stop recspan_ids.append(recspan_id) start_ticks.append(tick - span_start) stop_ticks = [tick + 1 for tick in start_ticks] dataset.add_events(recspan_ids, start_ticks, stop_ticks, raw_log_events) for delete_event in dataset.events_query({"code": DELETE_CODE}): delete_event.recspan_info["deleted"] = True for cal_event in dataset.events_query(calibration_events): for key in list(cal_event): del cal_event[key] cal_event["calibration_pulse"] = True if calibrate: for kwarg in [ "calibrate_low_cursor_time", "calibrate_high_cursor_time", "calibrate_pulse_size" ]: if locals()[kwarg] is None: raise ValueError("when calibrating, %s= argument must be " "specified" % (kwarg, )) half_width = dataset.data_format.ticks_to_ms( calibrate_half_width_ticks) cal_vals = {} for which, cursor_time in [("low", calibrate_low_cursor_time), ("high", calibrate_high_cursor_time)]: # Round cursor to nearest tick cursor_tick = dataset.data_format.ms_to_ticks(cursor_time) cursor_time = dataset.data_format.ticks_to_ms(cursor_tick) erp = dataset.rerp("calibration_pulse", cursor_time - half_width, cursor_time + half_width, "1", all_or_nothing=True, overlap_correction=False, verbose=False) cal_vals[which] = erp.betas["Intercept"].mean() cal_diffs = cal_vals["high"] - cal_vals["low"] calibrate_pulse_size *= calibrate_polarity # For each channel, we want to multiply by a factor with units uV/raw # We have calibrate_pulse_size uV = cal_diffs raw cal_scaler = calibrate_pulse_size / cal_diffs dataset.transform(np.diagflat(np.asarray(cal_scaler))) return dataset
def test_Dataset(): data_format = DataFormat(250, "uV", ["MOCK1", "MOCK2"]) dataset = Dataset(data_format) assert len(dataset) == 0 assert_raises(IndexError, dataset.__getitem__, 0) assert_raises(TypeError, dataset.__getitem__, slice(0, 0)) assert list(dataset) == [] dataset.add_recspan(np.ones((10, 2)) * 0, {"a": 0}) dataset.add_lazy_recspan(FakeLazyRecspan(np.ones((20, 2)) * 1), 20, {"a": 1}) dataset.add_lazy_recspan(FakeLazyRecspan(np.ones((30, 2)) * 0), 30, {"a": 2}) dataset.add_recspan(np.ones((40, 2)) * 1, {"a": 3}) assert len(dataset) == 4 assert_raises(IndexError, dataset.__getitem__, 4) dataset.add_event(1, 10, 11, {"foo": "bar"}) def t(ds, recspan_id, expected_values=None): recspan = ds[recspan_id] assert isinstance(recspan, pandas.DataFrame) expected_ticks = 10 * (recspan_id + 1) assert recspan.shape == (expected_ticks, 2) # 1/250 Hz = 4.0 ms assert np.all(recspan.index == np.arange(expected_ticks) * 4.0) # index is supposed to be floats. Not sure if that means float or # np.float64, but this check should work for either: assert isinstance(recspan.index[0], float) assert np.all(recspan.columns == ["MOCK1", "MOCK2"]) # Values are supposed to be floating point as well. assert type(recspan.iloc[0, 0]) is np.float64 if expected_values is None: local_recspan_id = recspan_id % 2 expected_values = local_recspan_id assert np.allclose(recspan, expected_values) assert np.allclose(ds.raw_slice(recspan_id, 0, recspan.shape[0]), recspan) assert_raises(IndexError, ds.raw_slice, recspan_id, -1, 10) assert_raises(IndexError, ds.raw_slice, recspan_id, 10, -1) assert ds.raw_slice(recspan_id, 2, 2).shape == (0, 2) assert np.all(ds.raw_slice(recspan_id, 2, 5) == recspan.iloc[2:5, :]) assert_raises(IndexError, ds.raw_slice, recspan_id, 0, 200) assert ds.recspan_infos[recspan_id]["a"] == recspan_id assert ds.recspan_infos[recspan_id].ticks == expected_ticks for i in xrange(4): t(dataset, i) # DataFormat mismatch diff_dataset = Dataset(DataFormat(500, "uV", ["MOCK1", "MOCK2"])) assert_raises(ValueError, diff_dataset.add_dataset, dataset) dataset_copy = Dataset(data_format) dataset_copy.add_dataset(dataset) assert len(dataset_copy) == 4 for i in xrange(4): t(dataset_copy, i) assert len(dataset_copy.events()) == 1 assert dict(dataset_copy.events()[0]) == {"foo": "bar"} assert dataset_copy.events()[0].recspan_id == 1 assert_raises(ValueError, dataset.transform, np.eye(2), exclude=["MOCK1"]) dataset.transform([[2, 0], [0, 3]]) # Transforming the first data set doesn't affect the second for i in xrange(4): t(dataset_copy, i) # But it does affect the first! for i in xrange(4): t(dataset, i, expected_values=[[2 * (i % 2), 3 * (i % 2)]]) # Try a symbolic transform too -- it should stack with the previous # transform. dataset.transform("-MOCK1/3", exclude=["MOCK1"]) for i in xrange(4): t(dataset, i, expected_values=[[2 * (i % 2), 3 * (i % 2) - (2./3) * (i % 2)]]) # Also check that changing one Dataset's metadata doesn't affect the copy. dataset.recspan_infos[0]["a"] = 100 assert dataset.recspan_infos[0]["a"] == 100 assert dataset_copy.recspan_infos[0]["a"] == 0 # Set it back to avoid any confusion later in the test dataset.recspan_infos[0]["a"] = 0 # Check __iter__ recspans = list(dataset) assert len(recspans) == 4 from pandas.util.testing import assert_frame_equal for i in xrange(4): assert_frame_equal(recspans[i], dataset[i]) # Smoke test repr(dataset)
def load_erpss(raw, log, calibration_events="condition == 0", lazy=True, calibrate=False, calibrate_half_width_ticks=5, calibrate_low_cursor_time=None, calibrate_high_cursor_time=None, calibrate_pulse_size=None, calibrate_polarity=1): dtype = np.float64 metadata = {} if isinstance(raw, basestring): metadata["raw_file"] = os.path.abspath(raw) if isinstance(log, basestring): metadata["log_file"] = os.path.abspath(log) metadata["calibration_events"] = str(calibration_events) raw = maybe_open(raw) log = maybe_open(log) (fetcher, hz, channel_names, raw_codes, data, header_metadata ) = read_raw(raw, dtype, lazy) metadata.update(header_metadata) if calibrate: units = "uV" else: units = "RAW" data_format = DataFormat(hz, units, channel_names) total_ticks = raw_codes.shape[0] raw_log_events = read_log(log) expanded_log_codes = np.zeros(raw_codes.shape, dtype=int) try: expanded_log_codes[raw_log_events.index] = raw_log_events["code"] except IndexError as e: raise ValueError("log file claims event at position where there is " "no data: %s" % (e,)) # Sometimes people "delete" events by setting the high (sign) bit of the # code in the log file (e.g. with 'logpoke'). So we ignore this bit when # comparing log codes to raw codes -- mismatches here do not indicate an # error -- and then are careful to use the log codes, rather than the # raw codes, below. if np.any((expanded_log_codes & ~0x8000) != (raw_codes & ~0x8000)): raise ValueError("raw and log files have mismatched codes") del raw_codes del expanded_log_codes pause_events = (raw_log_events["code"] == PAUSE_CODE) delete_events = (raw_log_events["code"] == DELETE_CODE) break_events = pause_events | delete_events break_ticks = raw_log_events.index[break_events] # The pause/delete code appears at the last sample of the old era, so if # used directly, adjacent pause ticks give contiguous spans of recording # as (pause1, pause2]. (Confirmed by checking by hand in a real recording # that the data associated with the sample that has the pause code is # contiguous with the sample before, but not the sample after.) Adding +1 # to each of them then converts this to Python style [pause1, pause2) # intervals. There is a pause code at the last record of the file, but not # one at the first, so we add that in explicitly. break_ticks += 1 span_edges = np.concatenate(([0], break_ticks)) assert span_edges[0] == 0 assert span_edges[-1] == total_ticks span_slices = [slice(span_edges[i], span_edges[i + 1]) for i in xrange(len(span_edges) - 1)] dataset = Dataset(data_format) for span_slice in span_slices: if lazy: lr = LazyRecspan(fetcher, dtype, len(channel_names), span_slice.start, span_slice.stop) dataset.add_lazy_recspan(lr, span_slice.stop - span_slice.start, metadata) else: dataset.add_recspan(data[span_slice, :], metadata) span_starts = [s.start for s in span_slices] recspan_ids = [] start_ticks = [] for tick in raw_log_events.index: recspan_id = bisect.bisect(span_starts, tick) - 1 span_slice = span_slices[recspan_id] span_start = span_slice.start span_stop = span_slice.stop assert span_start <= tick < span_stop recspan_ids.append(recspan_id) start_ticks.append(tick - span_start) stop_ticks = [tick + 1 for tick in start_ticks] dataset.add_events(recspan_ids, start_ticks, stop_ticks, raw_log_events) for delete_event in dataset.events_query({"code": DELETE_CODE}): delete_event.recspan_info["deleted"] = True for cal_event in dataset.events_query(calibration_events): for key in list(cal_event): del cal_event[key] cal_event["calibration_pulse"] = True if calibrate: for kwarg in ["calibrate_low_cursor_time", "calibrate_high_cursor_time", "calibrate_pulse_size"]: if locals()[kwarg] is None: raise ValueError("when calibrating, %s= argument must be " "specified" % (kwarg,)) half_width = dataset.data_format.ticks_to_ms(calibrate_half_width_ticks) cal_vals = {} for which, cursor_time in [("low", calibrate_low_cursor_time), ("high", calibrate_high_cursor_time)]: # Round cursor to nearest tick cursor_tick = dataset.data_format.ms_to_ticks(cursor_time) cursor_time = dataset.data_format.ticks_to_ms(cursor_tick) erp = dataset.rerp("calibration_pulse", cursor_time - half_width, cursor_time + half_width, "1", all_or_nothing=True, overlap_correction=False, verbose=False) cal_vals[which] = erp.betas["Intercept"].mean() cal_diffs = cal_vals["high"] - cal_vals["low"] calibrate_pulse_size *= calibrate_polarity # For each channel, we want to multiply by a factor with units uV/raw # We have calibrate_pulse_size uV = cal_diffs raw cal_scaler = calibrate_pulse_size / cal_diffs dataset.transform(np.diagflat(np.asarray(cal_scaler))) return dataset