def datenrich(dat, out, label_file, window): dataset = bark.read_sampled(dat) data, params = dataset.data, dataset.attrs rate = params["sampling_rate"] total_samples = data.shape[0] # cut out labelled segments label_dset = bark.read_events(label_file) for x in label_dset.data.itertuples(): assert x.start > 0 assert x.start * rate < total_samples assert x.stop > 0 assert x.stop * rate < total_samples if x.start - window < 0: print('warning, cannot place a full window at beginning of data') segs, newlabels = get_segments(label_dset.data, window) # convert to samples segs = np.array(segs * rate, dtype=int) # write to new file with open(out, "wb") as outfp: for start, stop in segs: assert stop > 0 assert start < total_samples assert start >= 0 if stop >= total_samples: print('warning, cannot place a full window at end of data') stop = total_samples - 1 outfp.write(data[start:stop, :].tobytes()) bark.write_metadata(out, **params) bark.write_events( os.path.splitext(out)[0] + ".csv", newlabels, **label_dset.attrs)
def main(datfile, labelfile, outfile=None, shortcutfile=None, use_ops=True): if not labelfile: labelfile = os.path.splitext(datfile)[0] + '.csv' kill_shortcuts(plt) sampled = bark.read_sampled(datfile) assert len(sampled.attrs['columns']) == 1 labels = bark.read_events(labelfile) labeldata = to_seconds(labels).data.to_dict('records') if len(labeldata) == 0: print('{} has no data'.format(labelfile)) return shortcuts = build_shortcut_map(shortcutfile) opsfile = labelfile + '.ops.json' opstack = load_opstack(opsfile, labelfile, labeldata, use_ops) if not outfile: outfile = os.path.splitext(labelfile)[0] + '_edit.csv' plt.figure() # Oscillogram and Spectrogram get # three times the vertical space as the minimap. osc_ax = plt.subplot2grid((7, 1), (0, 0), rowspan=3) spec_ax = plt.subplot2grid((7, 1), (3, 0), rowspan=3, sharex=osc_ax) map_ax = plt.subplot2grid((7, 1), (6, 0)) # Segement review is a context manager to ensure a save prompt # on exit. see SegmentReviewer.__exit__ with SegmentReviewer(osc_ax, spec_ax, map_ax, sampled, opstack, shortcuts, outfile, labels.attrs, opsfile) as reviewer: reviewer.connect() plt.show(block=True)
def test_main(tmpdir): csvfile = str(tmpdir.join('test.csv')) datfile = str(tmpdir.join('test.dat')) data = np.arange(100).reshape(-1, 1) % 10 bark.write_sampled(datfile, data, sampling_rate=10) main(datfile, csvfile, .1, 3) result = bark.read_events(csvfile) assert 'start' in result.data.columns assert 'channel' in result.data.columns assert np.allclose(result.data.start, np.arange(9, 100, 10)/10)
def main(datfile, trigfile, outfile, wavfiles): common_sr = 22050 # everything is resampled to this # get wav envelopes stim_names, stim_envs = wav_envelopes(wavfiles, common_sr) mic_dset = bark.read_sampled(datfile) mic_sr = mic_dset.sampling_rate starts = bark.read_events(trigfile).data.start # get most likely stimulus for each trigger time labels = classify_stimuli(mic_dset.data, mic_sr, starts, stim_names, stim_envs, common_sr) stops = get_stops(labels, starts, stim_names, stim_envs, common_sr) write(outfile, starts, stops, labels)
def getfiles(): file = FileDialog() files = file.openFileNamesDialog() if not files: sys.exit(app.exec_()) sampled = [bark.read_sampled(file) for file in files] readonlylabelfile = file.openFileNameDialog() if not readonlylabelfile: import pandas as pd origin_labels = pd.DataFrame() else: origin_labels = bark.read_events(readonlylabelfile).data return files, sampled, sampled
def _main(): args = _parse_args(sys.argv[1:]) spike_ds = bark.read_events(args.spikes) stim_time_ds = bark.read_events(args.stimtimes) if args.stim: if os.path.splitext(args.stim)[-1] == '.wav': sr, stim = scipy.io.wavfile.read(args.stim) stimulus = Stimulus(args.name, stim, sr) else: stim = bark.read_sampled(args.stim) stimulus = Stimulus(args.name, stim.data, stim.sampling_rate) else: stimulus = None title_str = '"{}"-aligned spike raster, unit {}' fn_str = '{}_aligned_raster_unit_{}.{}' for unit in set(spike_ds['name']): f = aligned_raster(spike_ds[spike_ds['name'] == unit]['start'], stim_time_ds, args.name, padding=(args.bef, args.aft), title=title_str.format(args.name, unit), stim_data=stimulus) f.savefig(fn_str.format(args.name, unit, args.ext)) plt.close(f)
def main(in_csv, out_csv, noise_name='z', song_tier=None, boundary_length=0.00, boundary_label='__'): dset = bark.read_events(in_csv) df = dset.data if song_tier: df = strip_tiers(df, song_tier) df = shorten_and_lowercase_names(df) df = remove_noise_samples(df, noise_name) if boundary_length > 0: df = add_boundaries(df, boundary_size=boundary_length, boundary_label=boundary_label) bark.write_events(out_csv, df, **dset.attrs)
def read_files(bird_dir, load_events): ''' bird_dir: location of data load_events: If true, also load matching csvs Reads raw files for testing and training. Returns a list of sampled datasets and a list of event datasets ''' data_files = glob(join(bird_dir, "*.dat")) print('number of files: ', len(data_files)) sampled_dsets = [bark.read_sampled(dfile) for dfile in data_files] if not load_events: return sampled_dsets target_files = [splitext(x)[0] + ".csv" for x in data_files] event_dsets = [bark.read_events(tfile) for tfile in target_files] return sampled_dsets, event_dsets
def readfiles(outfile=None, shortcutfile=None, use_ops=True): """Read all files from the fileDialog and create files if those files are missing. If no .dat files, exit. Auto find label file named with '[dat_name]_split.csv' If not exist, create a new one with customize label and a .meta file create opstack and outfiles Returns: origin_labels,trace_num, gap, sampled, opstack, shortcuts, outfile, labels.attrs, opsfile """ gap = 0 file = FileDialog() files = file.openFileNamesDialog() if not files: sys.exit(app.exec_()) files.reverse() sampled = [bark.read_sampled(file) for file in files] readonlylabelfile = file.openFileNameDialog() if not readonlylabelfile: import pandas as pd origin_labels = pd.DataFrame() else: origin_labels = bark.read_events(readonlylabelfile).data trace_num = len(files) dat = files[0] labelfile = os.path.splitext(dat)[0] + '_split.csv' exist = os.path.exists(labelfile) kill_shortcuts(plt) opsfile = labelfile + '.ops.json' metadata = labelfile + '.meta.yaml' if not os.path.exists(labelfile): write_metadata(labelfile) if not os.path.exists(labelfile): showDia = Input() gap = int(showDia.showDialog()) start = 0 end = int( round(len(sampled[0].data) / sampled[0].attrs["sampling_rate"])) trace_num = len(sampled) createlabel(labelfile, start, end, gap) labels = bark.read_events(labelfile) labeldata = to_seconds(labels).data.to_dict('records') if len(labeldata) == 0: print('{} contains no intervals.'.format(labelfile)) return opstack = load_opstack(opsfile, labelfile, labeldata, use_ops) if not gap: if len(opstack.events) == 0: print('opstack is empty. Please delete {}.'.format(opstack)) return gap = opstack.events[0]['stop'] - opstack.events[0]['start'] shortcuts = build_shortcut_map(shortcutfile) #create a new outfile if not outfile: outfile = os.path.splitext(labelfile)[0] + '_edit.csv' channelname = [] import re for name in files: searchObj = re.search(r'(.*)/(.*).dat', name, re.M | re.I) channelname.append(searchObj.group(2)) return origin_labels, trace_num, channelname, gap, sampled, opstack, shortcuts, outfile, labels.attrs, opsfile