def test_write_sampled_empty(tmpdir): with pytest.raises(TypeError): bark.write_sampled("test_sampled", sampling_rate=10, units="mV", n_channels=10, dtype="int16")
def datref(datfile, outfile): dataset = bark.read_sampled(datfile) data, params = dataset.data, dataset.attrs outparams = params.copy() bark.write_sampled(outfile, data, outparams) outdset = bark.read_sampled(outfile, 'r+') out = outdset.data # determine reference coefficient n_channels = len(params["columns"]) coefs = np.zeros((n_channels, len(range(0, len(out), BUF)))) power = np.zeros_like(coefs) for ith, i in enumerate(range(0, len(out), BUF)): for c in range(n_channels): refs = np.delete(data[i:i + BUF, :], c, axis=1) # remove col c ref = np.mean(refs, axis=1) x = data[i:i + BUF, c] coefs[c, ith] = np.dot(x, ref) / np.dot(ref, ref) best_C = np.zeros(n_channels) for c in range(n_channels): c_coefs = coefs[c, :] c_power = power[c, :] mask = c_power >= np.percentile(c_power, 90) best_C[c] = np.nanmean(c_coefs[mask]) print("best reference coefficients: {}".format(best_C)) for i, c in enumerate(best_C): outparams['columns'][i]['reference_coefficient'] = float(c) for i in range(0, len(out), BUF): for c in range(n_channels): refs = np.delete(data[i:i + BUF, :], c, axis=1) # remove col c ref = np.mean(refs, axis=1) x = data[i:i + BUF, c] out[i:i + BUF, c] = data[i:i + BUF, c] - best_C[c] * np.median(refs, axis=1) bark.write_metadata(outfile, **outparams)
def test_main(tmpdir): csvfile = str(tmpdir.join('test.csv')) datfile = str(tmpdir.join('test.dat')) data = np.arange(100).reshape(-1, 1) % 10 bark.write_sampled(datfile, data, sampling_rate=10) main(datfile, csvfile, .1, 3) result = bark.read_events(csvfile) assert 'start' in result.data.columns assert 'channel' in result.data.columns assert np.allclose(result.data.start, np.arange(9, 100, 10)/10)
def datchunk(dat, stride, use_seconds): attrs = bark.read_metadata(dat) sr = attrs['sampling_rate'] if use_seconds: stride = stride * sr stride = int(stride) basename = os.path.splitext(dat)[0] for i, chunk in enumerate(stream.read(dat, chunksize=stride)): filename = "{}-chunk-{}.dat".format(basename, i) attrs['offset'] = stride * i bark.write_sampled(filename, chunk, **attrs)
def save(y_est, basename, p, y_true=None): '''Saves model predictions (y_est or y_hat) and optionally, the known true targets (y_true) ''' m = basename decoder = {v: k for k, v in p['encoder'].items()} sampling_rate = 1 / (p['window_spacing'] / p['sr']) cat2csv.main(y_est, sampling_rate, decoder, m + '_yhat.csv') np.savez(m + '_yhat.npz', yhat=y_est) if y_true is not None: np.savez(m + '_y_yhat.npz', y=y_true, yhat=y_est) bark.write_sampled( m + '_y_yhat.dat', np.column_stack([(y_true * 256).astype('int16'), (y_est * 256).astype('int16')]), sampling_rate)
def transfer_dset(ds_name, ds, e_path, verbose=False): ds_attrs = copy_attrs(ds.attrs) units = ds_attrs.pop('units', None) if arf.is_time_series(ds): ds_name += '.dat' ds_path = os.path.join(e_path, ds_name) ds_attrs['columns'] = build_columns(units) sr = ds_attrs.pop('sampling_rate') bark_ds = bark.write_sampled(ds_path, ds, sr, **ds_attrs) if verbose: print('Created sampled dataset: ' + ds_path) elif arf.is_marked_pointproc(ds): ds_name += '.csv' ds_path = os.path.join(e_path, ds_name) ds_data = pandas.DataFrame(ds[:]) ds_attrs['columns'] = build_columns(units, column_names=ds_data.columns) for ser in ds_data: if ds_data[ser].dtype == numpy.dtype('O'): # bytes object ds_data[ser] = ds_data[ser].str.decode('utf-8') bark_ds = bark.write_events(ds_path, ds_data, **ds_attrs) if verbose: print('Created event dataset: ' + ds_path) else: unknown_ds_warning(ds_name)
def test_read_metadata(tmpdir): # entry/dir with good metadata file entry_path = os.path.join(tmpdir.strpath, "myentry") dtime = arrow.get("2020-01-02T03:04:05+06:00").datetime entry = bark.create_entry(entry_path, dtime, food="pizza") entry_metadata = bark.read_metadata(entry_path) assert 'timestamp' in entry_metadata # try to read entry/dir metadata file directly with pytest.raises(ValueError): entry_metadata = bark.read_metadata(os.path.join(entry_path, 'meta.yaml')) # entry/dir without metadata file with pytest.raises(FileNotFoundError): entry_metadata = bark.read_metadata(tmpdir.strpath) # dataset with good metadata file data = np.zeros((10,3), dtype='int16') params = dict(sampling_rate=30000, units="mV", unit_scale=0.025, extra="barley") dset_path = os.path.join(tmpdir.strpath, "test_sampled") dset = bark.write_sampled(datfile=dset_path, data=data, **params) dset_metadata = bark.read_metadata(dset_path) assert 'sampling_rate' in dset_metadata # try to read dataset metadata file directly with pytest.raises(ValueError): dset_metadata = bark.read_metadata(dset_path + '.meta.yaml') # dataset without metadata file os.remove(dset_path + '.meta.yaml') with pytest.raises(FileNotFoundError): dset_metadata = bark.read_metadata(dset_path) # dataset that doesn't exist with pytest.raises(FileNotFoundError): dset_metadata = bark.read_metadata(os.path.join(tmpdir.strpath, 'fake_dset.dat'))
def test_read_dataset(tmpdir): path = os.path.join(tmpdir.strpath, 'test_events') data = pd.DataFrame({ 'start': [0, 1, 2, 3], 'stop': [1, 2, 3, 4], 'name': ['a', 'b', 'c', 'd'] }) event_written = bark.write_events(path, data, columns={ 'start': {'units', 's'}, 'stop': { 'units': 's' }, 'name': { 'units': None } }) event_read = bark.read_dataset(path) assert isinstance(event_read, bark.EventData) path = os.path.join(tmpdir.strpath, 'test_samp') data = np.zeros((10, 3), dtype="int16") params = {'sampling_rate': 30000, 'units': 'mV', 'unit_scale': 0.025} samp_written = bark.write_sampled(path, data=data, **params) samp_read = bark.read_dataset(path) assert isinstance(samp_read, bark.SampledData)
def test_write_sampled(tmpdir): data = np.zeros((10,3),dtype="int16") params = dict(sampling_rate=30000, units="mV", unit_scale=0.025, extra="barley") dset = bark.write_sampled(os.path.join(tmpdir.strpath, "test_sampled"), data=data, **params) assert isinstance(dset, bark.SampledData) assert isinstance(dset.path, str) assert isinstance(dset.attrs, dict) assert isinstance(dset.data, np.memmap)
def test_closing(tmpdir): # setup ds_name = 'test_sampled.dat' entry1_path = os.path.join(tmpdir.strpath, "entry1") dtime = arrow.get("2020-01-02T03:04:05+06:00").datetime entry1 = bark.create_entry(entry1_path, dtime, food="pizza") entry2_path = os.path.join(tmpdir.strpath, "entry2") dtime = arrow.get("2020-01-10T03:04:05+06:00").datetime entry2 = bark.create_entry(entry2_path, dtime, food="burritos") data = np.zeros((10,3), dtype='int16') params = dict(sampling_rate=30000, units="mV", unit_scale=0.025, extra="barley") dset_path = os.path.join(entry1_path, ds_name) dset = bark.write_sampled(datfile=dset_path, data=data, **params) del entry1, entry2, dset r = bark.read_root(tmpdir.strpath) # initial checking assert len(r.entries) == 2 for ename in r.entries: assert callable(r.entries.get(ename)) # load entry1 entry1 = r.entries['entry1'] assert isinstance(r.entries.get('entry1'), bark.Entry) assert callable(r.entries.get('entry2')) # load sampled dataset assert callable(entry1.datasets.get(ds_name)) ds1 = entry1.datasets[ds_name] assert not callable(entry1.datasets.get(ds_name)) assert isinstance(ds1, bark.SampledData) # close entry del ds1 assert not callable(entry1.datasets.get(ds_name)) assert isinstance(entry1.datasets.get(ds_name), bark.SampledData) entry1.close() assert callable(entry1.datasets.get(ds_name)) # close root del entry1 assert not callable(r.entries.get('entry1')) assert isinstance(r.entries.get('entry1'), bark.Entry) r.close() assert callable(r.entries.get('entry1'))
def test_datchunk(tmpdir): from bark.tools import barkutils CHUNK = 350 TOTAL_SIZE = 1000 data = np.array([range(TOTAL_SIZE), range(TOTAL_SIZE, 2 * TOTAL_SIZE)]).transpose() params = dict(sampling_rate=30000, units="mV", unit_scale=0.025, extra="barley") dset = bark.write_sampled(os.path.join(tmpdir.strpath, "test.dat"), data=data, **params) barkutils.datchunk(dset.path, CHUNK, use_seconds=False, one_cut=True) first_fn = os.path.join(tmpdir.strpath, "test-chunk-0.dat") second_fn = os.path.join(tmpdir.strpath, "test-chunk-1.dat") assert os.path.exists(first_fn) assert os.path.exists(second_fn) first = bark.read_sampled(first_fn) second = bark.read_sampled(second_fn) assert (first.data == dset.data[:CHUNK,:]).all() assert first.attrs.pop('offset') == 0 assert first.attrs == dset.attrs assert (second.data == dset.data[CHUNK:TOTAL_SIZE,:]).all() assert second.attrs.pop('offset') == CHUNK assert second.attrs == dset.attrs del first, second os.remove(first_fn) os.remove(second_fn) assert not os.path.exists(first_fn) assert not os.path.exists(second_fn) barkutils.datchunk(dset.path, CHUNK, use_seconds=False, one_cut=False) third_fn = os.path.join(tmpdir.strpath, "test-chunk-2.dat") assert os.path.exists(first_fn) assert os.path.exists(second_fn) assert os.path.exists(third_fn) first = bark.read_sampled(first_fn) second = bark.read_sampled(second_fn) third = bark.read_sampled(third_fn) assert (first.data == dset.data[:CHUNK,:]).all() assert (second.data == dset.data[CHUNK:2*CHUNK,:]).all() assert (third.data == dset.data[2*CHUNK:,:]).all()
def write_chunk(chunk, attrs, i): filename = "{}-chunk-{}.dat".format(basename, i) attrs['offset'] = stride * i bark.write_sampled(filename, chunk, **attrs)
def dat_from_wav(wav, barkname, **attrs): rate, data = wavfile.read(wav) return bark.write_sampled(barkname, data, rate,**attrs)