def mock_dataset(num_channels=4, num_recspans=4, ticks_per_recspan=100, hz=250): data_format = DataFormat(hz, "uV", ["MOCK%s" % (i,) for i in xrange(num_channels)]) dataset = DataSet(data_format) r = np.random.RandomState(0) for i in xrange(num_recspans): data = r.normal(size=(ticks_per_recspan, num_channels)) dataset.add_recspan(data, {}) return dataset
def mock_dataset(num_channels=4, num_recspans=4, ticks_per_recspan=100, hz=250): data_format = DataFormat(hz, "uV", ["MOCK%s" % (i, ) for i in xrange(num_channels)]) dataset = DataSet(data_format) r = np.random.RandomState(0) for i in xrange(num_recspans): data = r.normal(size=(ticks_per_recspan, num_channels)) dataset.add_recspan(data, {}) return dataset
def check_transforms(dataset): saved_datas = [] for data in dataset: saved_datas.append(np.array(data)) tr1 = np.eye(dataset.data_format.num_channels) tr1[0, 0] = 2 tr1[0, 1] = -0.5 dataset.transform(tr1) for saved_data, data in zip(saved_datas, dataset): assert np.allclose(np.dot(saved_data, tr1.T), data) dataset_copy = DataSet(dataset.data_format) for saved_data, copy_data in zip(saved_datas, dataset_copy): assert np.allclose(np.dot(saved_data, tr1.T), copy_data) tr2 = np.eye(dataset.data_format.num_channels) tr2[-1, -1] = -3 tr2[1, 0] = 2.5 dataset.transform(tr2) for saved_data, copy_data in zip(saved_datas, dataset_copy): assert np.allclose(np.dot(saved_data, tr1.T), copy_data) tr_both = np.dot(tr2, tr1) for saved_data, data in zip(saved_datas, dataset): assert np.allclose(np.dot(saved_data, tr_both.T), data)
def test_DataSet(): data_format = DataFormat(250, "uV", ["MOCK1", "MOCK2"]) dataset = DataSet(data_format) assert len(dataset) == 0 assert_raises(IndexError, dataset.__getitem__, 0) assert_raises(TypeError, dataset.__getitem__, slice(0, 0)) assert list(dataset) == [] dataset.add_recspan(np.ones((10, 2)) * 0, {"a": 0}) dataset.add_recspan(np.ones((20, 2)) * 1, {"a": 1}) dataset.add_recspan(np.ones((30, 2)) * 0, {"a": 2}) dataset.add_recspan(np.ones((40, 2)) * 1, {"a": 3}) assert len(dataset) == 4 assert_raises(IndexError, dataset.__getitem__, 4) dataset.add_event(1, 10, 11, {"foo": "bar"}) def t(ds, recspan_id, expected_values=None): recspan = ds[recspan_id] assert isinstance(recspan, pandas.DataFrame) expected_ticks = 10 * (recspan_id + 1) assert recspan.shape == (expected_ticks, 2) # 1/250 Hz = 4.0 ms assert np.all(recspan.index == np.arange(expected_ticks) * 4.0) # index is supposed to be floats. Not sure if that means float or # np.float64, but this check should work for either: assert isinstance(recspan.index[0], float) assert np.all(recspan.columns == ["MOCK1", "MOCK2"]) # Values are supposed to be floating point as well. assert type(recspan.iloc[0, 0]) is np.float64 if expected_values is None: local_recspan_id = recspan_id % 2 expected_values = local_recspan_id assert np.allclose(recspan, expected_values) assert ds.recspan_infos[recspan_id]["a"] == recspan_id assert ds.recspan_infos[recspan_id].ticks == expected_ticks for i in xrange(4): t(dataset, i) # DataFormat mismatch diff_dataset = DataSet(DataFormat(500, "uV", ["MOCK1", "MOCK2"])) assert_raises(ValueError, diff_dataset.add_dataset, dataset) dataset_copy = DataSet(data_format) dataset_copy.add_dataset(dataset) assert len(dataset_copy) == 4 for i in xrange(4): t(dataset_copy, i) assert len(dataset_copy.events()) == 1 assert dict(dataset_copy.events()[0]) == {"foo": "bar"} assert dataset_copy.events()[0].recspan_id == 1 assert_raises(ValueError, dataset.transform, np.eye(2), exclude=["MOCK1"]) dataset.transform([[2, 0], [0, 3]]) # Transforming the first data set doesn't affect the second for i in xrange(4): t(dataset_copy, i) # But it does affect the first! for i in xrange(4): t(dataset, i, expected_values=[[2 * (i % 2), 3 * (i % 2)]]) # Try a symbolic transform too -- it should stack with the previous # transform. dataset.transform("-MOCK1/3", exclude=["MOCK1"]) for i in xrange(4): t(dataset, i, expected_values=[[2 * (i % 2), 3 * (i % 2) - (2.0 / 3) * (i % 2)]]) # Also check that changing one DataSet's metadata doesn't affect the copy. dataset.recspan_infos[0]["a"] = 100 assert dataset.recspan_infos[0]["a"] == 100 assert dataset_copy.recspan_infos[0]["a"] == 0 # Set it back to avoid any confusion later in the test dataset.recspan_infos[0]["a"] = 0 # Check __iter__ recspans = list(dataset) assert len(recspans) == 4 from pandas.util.testing import assert_frame_equal for i in xrange(4): assert_frame_equal(recspans[i], dataset[i])
def load_erpss(raw, log, calibration_events="condition == 0"): dtype = np.float64 metadata = {} if isinstance(raw, basestring): metadata["raw_file"] = os.path.abspath(raw) if isinstance(log, basestring): metadata["log_file"] = os.path.abspath(log) metadata["calibration_events"] = str(calibration_events) raw = maybe_open(raw) log = maybe_open(log) (hz, channel_names, raw_codes, data, header_metadata) = read_raw(raw, dtype) metadata.update(header_metadata) data_format = DataFormat(hz, "RAW", channel_names) raw_log_events = read_log(log) expanded_log_codes = np.zeros(raw_codes.shape, dtype=int) try: expanded_log_codes[raw_log_events.index] = raw_log_events["code"] except IndexError as e: raise ValueError("log file claims event at position where there is " "no data: %s" % (e, )) if np.any(expanded_log_codes != raw_codes): raise ValueError("raw and log files have mismatched codes") del raw_codes del expanded_log_codes pause_events = (raw_log_events["code"] == PAUSE_CODE) delete_events = (raw_log_events["code"] == DELETE_CODE) break_events = pause_events | delete_events break_ticks = raw_log_events.index[break_events] # The pause/delete code appears at the last sample of the old era, so if # used directly, adjacent pause ticks give contiguous spans of recording # as (pause1, pause2]. (Confirmed by checking by hand in a real recording # that the data associated with the sample that has the pause code is # contiguous with the sample before, but not the sample after.) Adding +1 # to each of them then converts this to Python style [pause1, pause2) # intervals. There is a pause code at the last record of the file, but not # one at the first, so we add that in explicitly. break_ticks += 1 span_edges = np.concatenate(([0], break_ticks)) assert span_edges[0] == 0 assert span_edges[-1] == data.shape[0] span_slices = [ slice(span_edges[i], span_edges[i + 1]) for i in xrange(len(span_edges) - 1) ] dataset = DataSet(data_format) for span_slice in span_slices: dataset.add_recspan(data[span_slice, :], metadata) span_starts = [s.start for s in span_slices] for tick, row in raw_log_events.iterrows(): attrs = row.to_dict() span_id = bisect.bisect(span_starts, tick) - 1 span_slice = span_slices[span_id] span_start = span_slice.start span_stop = span_slice.stop assert span_start <= tick < span_stop dataset.add_event(span_id, tick - span_start, tick - span_start + 1, attrs) if attrs["code"] == DELETE_CODE: dataset.recspan_infos[span_id]["deleted"] = True for cal_event in dataset.events_query(calibration_events): for key in list(cal_event): del cal_event[key] cal_event["calibration_pulse"] = True return dataset
def load_erpss(raw, log, calibration_events="condition == 0"): dtype = np.float64 metadata = {} if isinstance(raw, basestring): metadata["raw_file"] = os.path.abspath(raw) if isinstance(log, basestring): metadata["log_file"] = os.path.abspath(log) metadata["calibration_events"] = str(calibration_events) raw = maybe_open(raw) log = maybe_open(log) (hz, channel_names, raw_codes, data, header_metadata) = read_raw(raw, dtype) metadata.update(header_metadata) data_format = DataFormat(hz, "RAW", channel_names) raw_log_events = read_log(log) expanded_log_codes = np.zeros(raw_codes.shape, dtype=int) try: expanded_log_codes[raw_log_events.index] = raw_log_events["code"] except IndexError as e: raise ValueError("log file claims event at position where there is " "no data: %s" % (e,)) if np.any(expanded_log_codes != raw_codes): raise ValueError("raw and log files have mismatched codes") del raw_codes del expanded_log_codes pause_events = (raw_log_events["code"] == PAUSE_CODE) delete_events = (raw_log_events["code"] == DELETE_CODE) break_events = pause_events | delete_events break_ticks = raw_log_events.index[break_events] # The pause/delete code appears at the last sample of the old era, so if # used directly, adjacent pause ticks give contiguous spans of recording # as (pause1, pause2]. (Confirmed by checking by hand in a real recording # that the data associated with the sample that has the pause code is # contiguous with the sample before, but not the sample after.) Adding +1 # to each of them then converts this to Python style [pause1, pause2) # intervals. There is a pause code at the last record of the file, but not # one at the first, so we add that in explicitly. break_ticks += 1 span_edges = np.concatenate(([0], break_ticks)) assert span_edges[0] == 0 assert span_edges[-1] == data.shape[0] span_slices = [slice(span_edges[i], span_edges[i + 1]) for i in xrange(len(span_edges) - 1)] dataset = DataSet(data_format) for span_slice in span_slices: dataset.add_recspan(data[span_slice, :], metadata) span_starts = [s.start for s in span_slices] for tick, row in raw_log_events.iterrows(): attrs = row.to_dict() span_id = bisect.bisect(span_starts, tick) - 1 span_slice = span_slices[span_id] span_start = span_slice.start span_stop = span_slice.stop assert span_start <= tick < span_stop dataset.add_event(span_id, tick - span_start, tick - span_start + 1, attrs) if attrs["code"] == DELETE_CODE: dataset.recspan_infos[span_id]["deleted"] = True for cal_event in dataset.events_query(calibration_events): for key in list(cal_event): del cal_event[key] cal_event["calibration_pulse"] = True return dataset
def test_DataSet(): data_format = DataFormat(250, "uV", ["MOCK1", "MOCK2"]) dataset = DataSet(data_format) assert len(dataset) == 0 assert_raises(IndexError, dataset.__getitem__, 0) assert_raises(TypeError, dataset.__getitem__, slice(0, 0)) assert list(dataset) == [] dataset.add_recspan(np.ones((10, 2)) * 0, {"a": 0}) dataset.add_recspan(np.ones((20, 2)) * 1, {"a": 1}) dataset.add_recspan(np.ones((30, 2)) * 0, {"a": 2}) dataset.add_recspan(np.ones((40, 2)) * 1, {"a": 3}) assert len(dataset) == 4 assert_raises(IndexError, dataset.__getitem__, 4) dataset.add_event(1, 10, 11, {"foo": "bar"}) def t(ds, recspan_id, expected_values=None): recspan = ds[recspan_id] assert isinstance(recspan, pandas.DataFrame) expected_ticks = 10 * (recspan_id + 1) assert recspan.shape == (expected_ticks, 2) # 1/250 Hz = 4.0 ms assert np.all(recspan.index == np.arange(expected_ticks) * 4.0) # index is supposed to be floats. Not sure if that means float or # np.float64, but this check should work for either: assert isinstance(recspan.index[0], float) assert np.all(recspan.columns == ["MOCK1", "MOCK2"]) # Values are supposed to be floating point as well. assert type(recspan.iloc[0, 0]) is np.float64 if expected_values is None: local_recspan_id = recspan_id % 2 expected_values = local_recspan_id assert np.allclose(recspan, expected_values) assert ds.recspan_infos[recspan_id]["a"] == recspan_id assert ds.recspan_infos[recspan_id].ticks == expected_ticks for i in xrange(4): t(dataset, i) # DataFormat mismatch diff_dataset = DataSet(DataFormat(500, "uV", ["MOCK1", "MOCK2"])) assert_raises(ValueError, diff_dataset.add_dataset, dataset) dataset_copy = DataSet(data_format) dataset_copy.add_dataset(dataset) assert len(dataset_copy) == 4 for i in xrange(4): t(dataset_copy, i) assert len(dataset_copy.events()) == 1 assert dict(dataset_copy.events()[0]) == {"foo": "bar"} assert dataset_copy.events()[0].recspan_id == 1 assert_raises(ValueError, dataset.transform, np.eye(2), exclude=["MOCK1"]) dataset.transform([[2, 0], [0, 3]]) # Transforming the first data set doesn't affect the second for i in xrange(4): t(dataset_copy, i) # But it does affect the first! for i in xrange(4): t(dataset, i, expected_values=[[2 * (i % 2), 3 * (i % 2)]]) # Try a symbolic transform too -- it should stack with the previous # transform. dataset.transform("-MOCK1/3", exclude=["MOCK1"]) for i in xrange(4): t(dataset, i, expected_values=[[2 * (i % 2), 3 * (i % 2) - (2. / 3) * (i % 2)]]) # Also check that changing one DataSet's metadata doesn't affect the copy. dataset.recspan_infos[0]["a"] = 100 assert dataset.recspan_infos[0]["a"] == 100 assert dataset_copy.recspan_infos[0]["a"] == 0 # Set it back to avoid any confusion later in the test dataset.recspan_infos[0]["a"] = 0 # Check __iter__ recspans = list(dataset) assert len(recspans) == 4 from pandas.util.testing import assert_frame_equal for i in xrange(4): assert_frame_equal(recspans[i], dataset[i])