def datenrich(dat, out, label_file, window): dataset = bark.read_sampled(dat) data, params = dataset.data, dataset.attrs rate = params["sampling_rate"] total_samples = data.shape[0] # cut out labelled segments label_dset = bark.read_events(label_file) for x in label_dset.data.itertuples(): assert x.start > 0 assert x.start * rate < total_samples assert x.stop > 0 assert x.stop * rate < total_samples if x.start - window < 0: print('warning, cannot place a full window at beginning of data') segs, newlabels = get_segments(label_dset.data, window) # convert to samples segs = np.array(segs * rate, dtype=int) # write to new file with open(out, "wb") as outfp: for start, stop in segs: assert stop > 0 assert start < total_samples assert start >= 0 if stop >= total_samples: print('warning, cannot place a full window at end of data') stop = total_samples - 1 outfp.write(data[start:stop, :].tobytes()) bark.write_metadata(out, **params) bark.write_events( os.path.splitext(out)[0] + ".csv", newlabels, **label_dset.attrs)
def lbl_to_csv(fname, csvname, **attrs): import pandas as pd '''Converts an lbl file to a csv''' lblstruct = read(fname) csvdata = pd.DataFrame(lblstruct) csvdata.to_csv(csvname, index=False) write_metadata(csvname, **attrs)
def datref(datfile, outfile): dataset = bark.read_sampled(datfile) data, params = dataset.data, dataset.attrs outparams = params.copy() bark.write_sampled(outfile, data, outparams) outdset = bark.read_sampled(outfile, 'r+') out = outdset.data # determine reference coefficient n_channels = len(params["columns"]) coefs = np.zeros((n_channels, len(range(0, len(out), BUF)))) power = np.zeros_like(coefs) for ith, i in enumerate(range(0, len(out), BUF)): for c in range(n_channels): refs = np.delete(data[i:i + BUF, :], c, axis=1) # remove col c ref = np.mean(refs, axis=1) x = data[i:i + BUF, c] coefs[c, ith] = np.dot(x, ref) / np.dot(ref, ref) best_C = np.zeros(n_channels) for c in range(n_channels): c_coefs = coefs[c, :] c_power = power[c, :] mask = c_power >= np.percentile(c_power, 90) best_C[c] = np.nanmean(c_coefs[mask]) print("best reference coefficients: {}".format(best_C)) for i, c in enumerate(best_C): outparams['columns'][i]['reference_coefficient'] = float(c) for i in range(0, len(out), BUF): for c in range(n_channels): refs = np.delete(data[i:i + BUF, :], c, axis=1) # remove col c ref = np.mean(refs, axis=1) x = data[i:i + BUF, c] out[i:i + BUF, c] = data[i:i + BUF, c] - best_C[c] * np.median(refs, axis=1) bark.write_metadata(outfile, **outparams)
def rb_filter(): p = argparse.ArgumentParser(description=""" filter a sampled dataset """) p.add_argument("dat", help="dat file") p.add_argument("-o", "--out", help="name of output dat file") p.add_argument("--order", help="filter order", default=3, type=int) p.add_argument("--highpass", help="highpass frequency", type=float) p.add_argument("--lowpass", help="lowpass frequency", type=float) p.add_argument("-f", "--filter", help="filter type: butter or bessel", default="bessel") opt = p.parse_args() dtype = bark.read_metadata(opt.dat)['dtype'] stream.read(opt.dat)._analog_filter(opt.filter, highpass=opt.highpass, lowpass=opt.lowpass, order=opt.order).write(opt.out, dtype) attrs = bark.read_metadata(opt.out) attrs['highpass'] = opt.highpass attrs['lowpass'] = opt.lowpass attrs['filter'] = opt.filter attrs['filter_order'] = opt.order bark.write_metadata(opt.out, **attrs)
def main(dat, csv, thresh, is_std, order=default_order, min_dist=0): if is_std: std = compute_std(dat) threshs = thresh * std else: # make threshs a vector if it's a scalar n_channels = bark.read_sampled(dat).data.shape[1] threshs = np.ones(n_channels) * thresh print('thresholds:', threshs) s = stream.read(dat) pad_len = order with open(csv, 'w') as fp: fp.write('channel,start\n') for (channel, sample) in stream_spikes(s, threshs, pad_len, order, min_dist * s.sr): fp.write('{},{}\n'.format(channel, sample / s.sr)) bark.write_metadata(csv, datatype=1000, columns={ 'channel': { 'units': None }, 'start': { 'units': 's' } }, thresholds=threshs, order=order, source=dat)
def datref(datfile, outfile): shutil.copyfile(datfile, outfile) shutil.copyfile(datfile + '.meta.yaml', outfile + '.meta.yaml') outdset = bark.read_sampled(outfile, 'r+') out = outdset.data # determine reference coefficient n_samples, n_channels = out.shape coefs = np.zeros((n_channels, len(range(0, n_samples, BUF)))) power = np.zeros_like(coefs) for ith, i in enumerate(range(0, n_samples, BUF)): total_mean = np.mean(out[i:i + BUF, :], axis=1) for c in range(n_channels): x = out[i:i + BUF, c] # this way we avoid re-calculating the entire mean for each channel ref = (total_mean * n_channels - x) / (n_channels - 1) coefs[c, ith] = np.dot(x, ref) / np.dot(ref, ref) best_C = np.zeros(n_channels) for c in range(n_channels): c_coefs = coefs[c, :] c_power = power[c, :] mask = c_power >= np.percentile(c_power, 90) best_C[c] = np.nanmean(c_coefs[mask]) print("best reference coefficients: {}".format(best_C)) for i, c in enumerate(best_C): outdset.attrs['columns'][i]['reference_coefficient'] = float(c) # we want to avoid re-calculating the median from scratch for each channel # unfortunately, the "new median after removing an element" calculation # is less succinct than for the mean if n_channels % 2 == 0: median_idx = [int(n_channels / 2) - 1, int(n_channels / 2)] idx_smaller = [median_idx[0] + 1] # new median if elt removed < median idx_equal = [median_idx[0]] # new median if elt removed == median idx_greater = [median_idx[0]] # new median if elt removed > median else: median_idx = [int(n_channels / 2)] idx_smaller = [median_idx[0], median_idx[0] + 1] idx_equal = [median_idx[0] - 1, median_idx[0] + 1] idx_greater = [median_idx[0] - 1, median_idx[0]] for i in range(0, n_samples, BUF): sorted_buffer = np.sort(out[i:i + BUF, :], axis=1) total_medians = np.mean(sorted_buffer[:, median_idx], axis=1) new_med_smaller = np.mean(sorted_buffer[:, idx_smaller], axis=1) new_med_equal = np.mean(sorted_buffer[:, idx_equal], axis=1) new_med_greater = np.mean(sorted_buffer[:, idx_greater], axis=1) for c in range(n_channels): less = np.less(out[i:i + BUF, c], total_medians) equal = np.equal(out[i:i + BUF, c], total_medians) greater = np.greater(out[i:i + BUF, c], total_medians) out[i:i + BUF, c][less] = out[i:i + BUF, c][less] - best_C[c] * new_med_smaller[less] out[i:i + BUF, c][equal] = out[i:i + BUF, c][equal] - best_C[c] * new_med_equal[equal] out[i:i + BUF, c][greater] = out[ i:i + BUF, c][greater] - best_C[c] * new_med_greater[greater] bark.write_metadata(outfile, **outdset.attrs)
def board_adc_metadata(result, dsetname): attrs = dict( dtype=result['board_adc_data'].dtype.str, sampling_rate=result['frequency_parameters']['board_adc_sample_rate'], ) columns = { i: chan_attrs for i, chan_attrs in enumerate(result['board_adc_channels']) } for k in columns: columns[k]['units'] = 'V' columns[k]['unit_scale'] = result['ADC_input_bit_volts'] write_metadata(dsetname, columns=columns, **attrs)
def amplifier_metadata(result, dsetname): attrs = dict( dtype=result['amplifier_data'].dtype.str, sampling_rate=result['frequency_parameters']['amplifier_sample_rate'], ) attrs.update(result['frequency_parameters']) columns = { i: chan_attrs for i, chan_attrs in enumerate(result['amplifier_channels']) } for k in columns: columns[k]['units'] = 'uV' columns[k]['unit_scale'] = result['amplifier_bit_microvolts'] write_metadata(dsetname, columns=columns, **attrs)
def write_csv(textgrid_list, filename=None, sep=",", header=True, save_gaps=False, meta=True): """ Writes a list of textgrid dictionaries to a csv file. If no filename is specified, csv is printed to standard out. """ columns = list(Entry._fields) if filename: f = open(filename, "w") if header: hline = sep.join(columns) if filename: f.write(hline + "\n") else: print(hline) for entry in textgrid_list: if entry.name or save_gaps: # skip unlabeled intervals row = sep.join(str(x) for x in list(entry)) if filename: f.write(row + "\n") else: print(row) if filename: f.flush() f.close() if meta and filename: attrs = { 'datatype': 2000, 'creator': 'praat', 'columns': { 'name': { 'units': None }, 'tier': { 'units': None }, 'start': { 'units': 's' }, 'stop': { 'units': 's' } } } bark.write_metadata(filename, **attrs)
def write_from_kwd(kwd, dat): all_data = load_all(kwd) n_channels = all_data[0]['data'].shape[1] for group_i, data in enumerate(all_data): write_binary(dat, data["data"]) assert data["data"].shape[1] == n_channels sampling_rate = data["info"]["sample_rate"] columns = {i: {'units': 'uV', 'unit_scale': float(data['app_attrs']['channel_bit_volts'][i])} for i in range(n_channels)} write_metadata(dat, sampling_rate=sampling_rate, dtype=data['data'].dtype.str, columns=columns)
def meta_attr(): p = argparse.ArgumentParser( description="Create/Modify a metadata attribute") p.add_argument("name", help="name of bark object (Entry or Dataset)") p.add_argument("attribute", help="name of bark attribute to create or modify") p.add_argument("value", help="value of attribute") args = p.parse_args() name, attr, val = (args.name, args.attribute, args.value) attrs = bark.read_metadata(name) try: attrs[attr] = eval(val) # try to parse except Exception: attrs[attr] = val # assign as string bark.write_metadata(name, **attrs)
def write_metadata(path, meta='.meta.yaml'): import codecs params = { 'columns': { 'name': { 'units': 'null' }, 'start': { 'units': 's' }, 'stop': { 'units': 's' } }, 'datatype': 2002 } bark.write_metadata(path, meta, **params)
def meta_column_attr(): p = argparse.ArgumentParser( description="Create/Modify a metadata attribute for a column of data") p.add_argument("name", help="name of bark object (Entry or Dataset)") p.add_argument("column", help="name of the column of a Dataset") p.add_argument("attribute", help="name of bark attribute to create or modify") p.add_argument("value", help="value of attribute") args = p.parse_args() name, column, attr, val = (args.name, args.column, args.attribute, args.value) attrs = bark.read_metadata(name) columns = attrs['columns'] if 'dtype' in attrs: column = int(column) try: columns[column][attr] = eval(val) # try to parse except Exception: columns[column][attr] = val # assign as string bark.write_metadata(name, **attrs)
def write(self, filename, dtype=None, **new_attrs): """ Saves to disk as raw binary """ attrs = self.attrs.copy() attrs["sampling_rate"] = self.sr if dtype: with open(filename, "wb") as fp: for data in self: fp.write(data.astype(dtype).tobytes()) else: # we don't know the datatype until we stream with open(filename, "wb") as fp: for data in self: fp.write(data.tobytes()) dtype = data.dtype.str attrs["dtype"] = dtype try: bark.sampled_columns(data, attrs['columns']) except (ValueError, KeyError): print('warning, column attribute was mangled ... reseting') attrs['columns'] = bark.sampled_columns(data) attrs.update(new_attrs) bark.write_metadata(filename, **attrs)