def datenrich(dat, out, label_file, window): dataset = bark.read_sampled(dat) data, params = dataset.data, dataset.attrs rate = params["sampling_rate"] total_samples = data.shape[0] # cut out labelled segments label_dset = bark.read_events(label_file) for x in label_dset.data.itertuples(): assert x.start > 0 assert x.start * rate < total_samples assert x.stop > 0 assert x.stop * rate < total_samples if x.start - window < 0: print('warning, cannot place a full window at beginning of data') segs, newlabels = get_segments(label_dset.data, window) # convert to samples segs = np.array(segs * rate, dtype=int) # write to new file with open(out, "wb") as outfp: for start, stop in segs: assert stop > 0 assert start < total_samples assert start >= 0 if stop >= total_samples: print('warning, cannot place a full window at end of data') stop = total_samples - 1 outfp.write(data[start:stop, :].tobytes()) bark.write_metadata(out, **params) bark.write_events( os.path.splitext(out)[0] + ".csv", newlabels, **label_dset.attrs)
def main(catdata, sampling_rate, decoder, outfile, min_syl_ms=default_min_syl, min_silent_ms=default_min_silent): from pandas import DataFrame min_syl = min_syl_ms / 1000 min_silent = min_silent_ms / 1000 start, stop, name = first_pass(np.argmax(catdata, 1), decoder, 1 / sampling_rate) second_pass(start, stop, name, min_silent) third_pass(start, stop, name, min_syl) bark.write_events(outfile, DataFrame(dict(start=start, stop=stop, name=name)), columns={ 'start': { 'units': 's' }, 'stop': { 'units': 's' }, 'name': { 'units': None } })
def _plexon_csv_to_bark_csv(): "shell script" import argparse p = argparse.ArgumentParser(description=""" Converts a plexon csv to a bark csv. """) p.add_argument("name", help="""Name of plexon csv file. Don't foget to export a header column. """) p.add_argument("-o", "--out", help="name of output csv file", required=True) p.add_argument("-a", "--attributes", action='append', type=lambda kv: kv.split("="), dest='keyvalues', help="extra metadata in the form of KEY=VALUE") args = p.parse_args() if args.keyvalues: attrs = dict(args.keyvalues) else: attrs = {} if "units" not in attrs: attrs["units"] = "s" attrs["filetype"] = "csv" attrs["creator"] = "plexon" data = barkify_csv(args.name) bark.write_events(args.out, data, **attrs)
def _waveclus2csv(): "shell script" p = argparse.ArgumentParser(description=""" Converts a wave_clus times_*.m file to a bark csv. """) p.add_argument("name", help="""Name of wave_clus times*.m file(s), if mutltiple files, assume they are ordered by channel. """, nargs="+") p.add_argument("-o", "--out", help="name of output csv file") p.add_argument("-a", "--attributes", action='append', type=lambda kv: kv.split("="), dest='keyvalues', help="extra metadata in the form of KEY=VALUE") args = p.parse_args() if args.keyvalues: attrs = dict(args.keyvalues) else: attrs = {} attrs["filetype"] = "csv" attrs["creator"] = "wave_clus" attrs["columns"] = {"name": {"units": None}, "start": {"units": "s"}} data = pd.concat(load_clusters(x, i) for i, x in enumerate(args.name)) bark.write_events(args.out, data, **attrs)
def save(self): 'Writes out labels to file.' from pandas import DataFrame label_data = DataFrame(self.opstack.events) bark.write_events(self.outfile, label_data, **self.label_attrs) print(self.outfile, 'written') if self.opsfile: write_stack(self.opsfile, self.opstack) print(self.opsfile, 'written')
def transfer_dset(ds_name, ds, e_path, verbose=False): ds_attrs = copy_attrs(ds.attrs) units = ds_attrs.pop('units', None) if arf.is_time_series(ds): ds_name += '.dat' ds_path = os.path.join(e_path, ds_name) ds_attrs['columns'] = build_columns(units) sr = ds_attrs.pop('sampling_rate') bark_ds = bark.write_sampled(ds_path, ds, sr, **ds_attrs) if verbose: print('Created sampled dataset: ' + ds_path) elif arf.is_marked_pointproc(ds): ds_name += '.csv' ds_path = os.path.join(e_path, ds_name) ds_data = pandas.DataFrame(ds[:]) ds_attrs['columns'] = build_columns(units, column_names=ds_data.columns) for ser in ds_data: if ds_data[ser].dtype == numpy.dtype('O'): # bytes object ds_data[ser] = ds_data[ser].str.decode('utf-8') bark_ds = bark.write_events(ds_path, ds_data, **ds_attrs) if verbose: print('Created event dataset: ' + ds_path) else: unknown_ds_warning(ds_name)
def test_read_dataset(tmpdir): path = os.path.join(tmpdir.strpath, 'test_events') data = pd.DataFrame({ 'start': [0, 1, 2, 3], 'stop': [1, 2, 3, 4], 'name': ['a', 'b', 'c', 'd'] }) event_written = bark.write_events(path, data, columns={ 'start': {'units', 's'}, 'stop': { 'units': 's' }, 'name': { 'units': None } }) event_read = bark.read_dataset(path) assert isinstance(event_read, bark.EventData) path = os.path.join(tmpdir.strpath, 'test_samp') data = np.zeros((10, 3), dtype="int16") params = {'sampling_rate': 30000, 'units': 'mV', 'unit_scale': 0.025} samp_written = bark.write_sampled(path, data=data, **params) samp_read = bark.read_dataset(path) assert isinstance(samp_read, bark.SampledData)
def main(): import argparse p = argparse.ArgumentParser(description=''' Convert Spyking Circus PHY GUI output to Bark event dataset. ''') p.add_argument('phydir', help='directory containing PHY GUI output files') p.add_argument('out', help='name of output event dataset') p.add_argument('-r', '--rate', required=True, type=float, help='sampling rate of original data') args = p.parse_args() data = create_data(args.phydir, args.rate) attrs = create_metadata(args.phydir) bark.write_events(args.out, data, **attrs)
def main(in_csv, out_csv, noise_name='z', song_tier=None, boundary_length=0.00, boundary_label='__'): dset = bark.read_events(in_csv) df = dset.data if song_tier: df = strip_tiers(df, song_tier) df = shorten_and_lowercase_names(df) df = remove_noise_samples(df, noise_name) if boundary_length > 0: df = add_boundaries(df, boundary_size=boundary_length, boundary_label=boundary_label) bark.write_events(out_csv, df, **dset.attrs)
def test_write_events(tmpdir): path = os.path.join(tmpdir.strpath, "test_events") data = pd.DataFrame({'start': [0,1,2,3], 'stop': [1,2,3,4], 'name': ['a','b','c','d']}) events = bark.write_events(path, data, units='s') assert isinstance(events, bark.EventData) assert 'start' in events.data.columns assert 'stop' in events.data.columns assert 'name' in events.data.columns assert np.allclose([0, 1, 2, 3], events.data.start)
def write(outfile, starts, stops, labels): if len(labels) < len(starts): print( 'warning, discarding {} events'.format(len(starts) - len(labels))) starts = starts[:len(labels)] outdset = pd.DataFrame(dict(start=starts, stop=stops, name=labels)) columns = { 'start': { 'units': 's' }, 'stop': { 'units': 's' }, 'name': { 'units': None } } bark.write_events(outfile, outdset, columns=columns)
def extract_sc(entry_fn, dataset, sc_suffix, out_fn): sr = bark.read_metadata(os.path.join(entry_fn, dataset))['sampling_rate'] # determine file names results_path = get_sc_path(entry_fn, dataset, sc_suffix, 'result') templates_path = get_sc_path(entry_fn, dataset, sc_suffix, 'templates') # extract times and amplitudes with h5py.File(results_path, 'r') as rf: cluster_times = { unique_temp_name(name): np.array(indices).astype(float) / sr for name, indices in rf['spiketimes'].items() } cluster_amplitudes = { unique_temp_name(name): np.array(amplitudes) for name, amplitudes in rf['amplitudes'].items() } cluster_names = sorted(cluster_times.keys(), key=int) event_list = [] for n in cluster_names: event_list.extend([ SpikeEvent(n, time[0], amp[0]) for time, amp in zip(cluster_times[n], cluster_amplitudes[n]) ]) event_list.sort(key=lambda se: se.time) # extract grades and center pad with h5py.File(templates_path, 'r') as tf: cluster_grades = [SC_GRADES_DICT[tag[0]] for tag in tf['tagged']] cluster_grades = { n: cluster_grades[idx] for idx, n in enumerate(cluster_names) } NUM_TEMPLATES = int(tf['temp_shape'][2][0] / 2) NUM_CHANNELS = int(tf['temp_shape'][0][0]) NUM_SAMPLES = int(tf['temp_shape'][1][0]) CHAN_BY_SAMPLE = NUM_CHANNELS * NUM_SAMPLES full_templates = {} for t in range(NUM_TEMPLATES): y_vals = tf['temp_y'][0] == t x_vals = tf['temp_x'][:, y_vals][0].astype(int) reconst = np.zeros(CHAN_BY_SAMPLE) for loc in x_vals: reconst[loc] = tf['temp_data'][:, loc][0] reshaped = reconst.reshape((NUM_CHANNELS, -1)) full_templates[t] = np.copy(reshaped) center_channel = {} for t in full_templates: # note that this assumes negative-going spikes min_across_channels = list(np.amin(full_templates[t], axis=1)) total_min = min(min_across_channels) center_channel[str(t)] = min_across_channels.index(total_min) # write times and amplitudes to event dataset attrs = { 'columns': { 'start': { 'units': 's' }, 'name': { 'units': None }, 'amplitude': { 'units': None } }, 'datatype': 1001, 'sampling_rate': sr, 'templates': { name: { 'score': cluster_grades[name], 'sc_name': long_temp_name(name), 'center_channel': center_channel[name] } for name in cluster_names } } return bark.write_events( os.path.join(entry_fn, out_fn), pandas.DataFrame({ 'start': [event.time for event in event_list], 'name': [event.name for event in event_list], 'amplitude': [event.amplitude for event in event_list] }), **attrs)