def get_dest_arf(filename, dry_run): """Returns handle for destination arf file""" if dry_run: fp = arf.open_file(filename + ".arf", mode="a", driver="core", backing_store=False) else: fp = arf.open_file(filename + ".arf", mode="w-") arf.set_attributes( fp, file_creator='org.meliza.arfx/arfxplog ' + core.__version__) log.info("opened '%s.arf' for writing", filename) return fp
def test06_creation_iter(): fp = arf.open_file("test06", mode="a", driver="core", backing_store=False) entry_names = ('z', 'y', 'a', 'q', 'zzyfij') for name in entry_names: g = arf.create_entry(fp, name, 0) arf.create_dataset(g, "dset", (), sampling_rate=1) assert_sequence_equal(arf.keys_by_creation(fp), entry_names)
def test07_append_to_table(): fp = arf.open_file("test07", mode="a", driver="core", backing_store=False) dtype = nx.dtype({'names': ("f1","f2"), 'formats': [nx.uint, nx.int32]}) dset = arf.create_table(fp, 'test', dtype=dtype) assert_equal(dset.shape[0], 0) arf.append_data(dset, (5, 10)) assert_equal(dset.shape[0], 1)
def createtemparf(filename, datatype=0): root, ext = os.path.splitext(filename) arffile = arf.open_file(tempfile.mktemp()) if ext == '.lbl': lbl_rec = lbl.read(filename) print(lbl_rec) dset = arf.create_dataset(arffile, os.path.split(filename)[-1], lbl_rec, units=['','s','s'], datatype=2002) dset.attrs['units'] = 's' elif ext == '.wav': wavfile = ewave.open(filename) arf.create_dataset(arffile, os.path.split(filename)[-1], wavfile.read(), sampling_rate=wavfile.sampling_rate, datatype=1) elif ext =='.pcm': from arfx import pcmio pcmfile = pcmio.open(filename) arf.create_dataset(arffile, os.path.split(filename)[-1], pcmfile.read(), sampling_rate=pcmfile.sampling_rate, datatype=datatype) elif ext == '.pcm_seq2': from arfx import io pcmseqfile = io.open(filename) dataset_basename = os.path.split(filename)[-1] for i in xrange(pcmseqfile.nentries): dataset_name = '_'.join([dataset_basename, str(i)]) arf.create_dataset(arffile, dataset_name, pcmseqfile.read(), sampling_rate=pcmseqfile.sampling_rate, timestamp=pcmseqfile.timestamp, datatype=datatype) #try block added because pcmseqfile.nentries doesn't seem to always be accurate try: pcmseqfile.entry += 1 except ValueError: continue return arffile['/']
def save(stream, filename, path, sampling_rate=None, chunk_size=None): """ Saves a Stream object to an .arf file. Can't be called by an instance of ArfStreamer. """ if chunk_size == None: chunk_size = stream.chunk_size if sampling_rate == None: raise Exception("You must specify the sampling rate in ArfStreamer.save") with arf.open_file(filename, 'a') as file: path = path.split("/") dst_name = path[-1] grp_path = "/".join(path[:-1]) grp = file.require_group(grp_path) #Get first batch of data data = stream.read(chunk_size) try: dst = arf.create_dataset(grp, dst_name, data, maxshape=(None,), sampling_rate=sampling_rate) except: raise ValueError('Error, maybe dataset with that name already exists') while True: data = stream.read(chunk_size) if len(data) == 0: break arf.append_data(dst, data) file.flush()
def arf2bark(arf_file, root_path, timezone, verbose, mangle_prefix=ENTRY_PREFIX): with arf.open_file(arf_file, 'r') as af: os.mkdir(root_path) root = bark.Root(root_path) if verbose: print('Created Root: ' + root_path) tle = None found_trigin = False for ename, entry in af.items(): # entries and top-level datasets if isinstance(entry, h5py.Group): # entries entry_path = os.path.join(root_path, ename) entry_attrs = copy_attrs(entry.attrs) for pos_arg in ('name', 'parents'): # along with 'timestamp' below, these are positional arguments to create_entry # for now, I prefer hard-coding them over messing with runtime introspection new_name = pos_arg while new_name in entry_attrs: new_name = '{}_{}'.format(mangle_prefix, new_name) try: entry_attrs[new_name] = entry_attrs.pop(pos_arg) except KeyError: pass else: if verbose: print('Renamed attribute {} of entry {} to {}'. format(pos_arg, ename, new_name)) timestamp = entry_attrs.pop('timestamp') if timezone: timestamp = bark.convert_timestamp(timestamp, timezone) else: timestamp = bark.convert_timestamp(timestamp) bark_entry = bark.create_entry(entry_path, timestamp, parents=False, **entry_attrs) if verbose: print('Created Entry: ' + entry_path) for ds_name, dataset in entry.items(): # entry-level datasets if ds_name == 'trig_in': # accessing trig_in -> segfault found_trigin = True # and skip the dataset else: transfer_dset(ds_name, dataset, entry_path, verbose) elif isinstance(entry, h5py.Dataset): # top-level datasets if arf.is_time_series(entry) or arf.is_marked_pointproc(entry): if tle is None: path = os.path.join(root_path, 'top_level') tle = bark.create_entry(path, 0, parents=False).path transfer_dset(ename, entry, tle, verbose) else: unknown_ds_warning(ename) # and skip, w/o creating TLE if found_trigin: print('Warning: found datasets named "trig_in". Jill-created ' + '"trig_in" datasets segfault when read, so these datasets' + ' were skipped. If you know the datasets are good, rename' + ' them and try again.') return bark.Root(root_path)
def main(kwikfile, datatypes, arf_name): if not datatypes: datatypes = [0] if not arf_name: arf_name = os.path.splitext(kwikfile)[0] + ".arf" with h5py.File(kwikfile, "r") as kfile, arf.open_file(arf_name, "w") as afile: copy(kfile, afile, datatypes)
def main(kwikfiles, datatypes): if not datatypes: datatypes = [0] for kwikfile in kwikfiles: arf_name = os.path.splitext(kwikfile)[0] + ".arf" with h5py.File(kwikfile, "r") as kfile, arf.open_file(arf_name, "w") as afile: copy(kfile, afile, datatypes)
def arf2bark(arf_file, root_path, timezone, verbose): with arf.open_file(arf_file, 'r') as af: os.mkdir(root_path) root = bark.Root(root_path) if verbose: print('Created Root: ' + root_path) tle = None found_trigin = False for ename, entry in af.items(): # entries and top-level datasets if isinstance(entry, h5py.Group): # entries entry_path = os.path.join(root_path, ename) entry_attrs = copy_attrs(entry.attrs) timestamp = entry_attrs.pop('timestamp') # rename 'name' attribute created by openephys arf module try: entry_attrs['openephys_name'] = entry_attrs.pop('name') except KeyError: pass if timezone: timestamp = bark.convert_timestamp(timestamp, timezone) else: timestamp = bark.convert_timestamp(timestamp) bark_entry = bark.create_entry(entry_path, timestamp, parents=False, **entry_attrs) if verbose: print('Created Entry: ' + entry_path) for ds_name, dataset in entry.items(): # entry-level datasets if ds_name == 'trig_in': # accessing trig_in -> segfault found_trigin = True # and skip the dataset else: transfer_dset(ds_name, dataset, entry_path, verbose) elif isinstance(entry, h5py.Dataset): # top-level datasets if arf.is_time_series(entry) or arf.is_marked_pointproc(entry): if tle is None: path = os.path.join(root_path, 'top_level') tle = bark.create_entry(path, 0, parents=False).path transfer_dset(ename, entry, tle, verbose) else: unknown_ds_warning(ename) # and skip, w/o creating TLE if found_trigin: print('Warning: found datasets named "trig_in". Jill-created ' + '"trig_in" datasets segfault when read, so these datasets' + ' were skipped. If you know the datasets are good, rename' + ' them and try again.') return bark.Root(root_path)
def createtemparf(filename, datatype=0): root, ext = os.path.splitext(filename) arffile = arf.open_file(tempfile.mktemp()) if ext == '.lbl': lbl_rec = lbl.read(filename) print(lbl_rec) dset = arf.create_dataset(arffile, os.path.split(filename)[-1], lbl_rec, units=['','s','s'], datatype=2002) dset.attrs['units'] = 's' elif ext == '.wav': wavfile = ewave.open(filename) arf.create_dataset(arffile, os.path.split(filename)[-1], wavfile.read(), sampling_rate=wavfile.sampling_rate, datatype=1) elif ext =='.pcm': from arfx import pcmio pcmfile = pcmio.open(filename) arf.create_dataset(arffile, os.path.split(filename)[-1], pcmfile.read(), sampling_rate=pcmfile.sampling_rate, datatype=datatype) return arffile['/']
def arf2bark(arf_file, root_parent, timezone, verbose): with arf.open_file(arf_file, 'r') as af: # root root_dirname = os.path.splitext(arf_file)[0] root_path = os.path.join(os.path.abspath(root_parent), root_dirname) os.mkdir(root_path) root = bark.Root(root_path) if verbose: print('Created Root: ' + root_path) tle = None found_trigin = False for ename, entry in af.items(): # entries and top-level datasets if isinstance(entry, h5py.Group): # entries entry_path = os.path.join(root_path, ename) entry_attrs = copy_attrs(entry.attrs) timestamp = entry_attrs.pop('timestamp') if timezone: timestamp = bark.convert_timestamp(timestamp, timezone) else: timestamp = bark.convert_timestamp(timestamp) bark_entry = bark.create_entry(entry_path, timestamp, parents=False, **entry_attrs) if verbose: print('Created Entry: ' + entry_path) for ds_name, dataset in entry.items(): # entry-level datasets if ds_name == 'trig_in': # accessing trig_in -> segfault found_trigin = True # and skip the dataset else: transfer_dset(ds_name, dataset, entry_path, verbose) elif isinstance(entry, h5py.Dataset): # top-level datasets if tle is None: path = os.path.join(root_path, 'top_level') tle = bark.create_entry(path, 0, parents=False).path transfer_dset(ename, entry, tle, verbose) if found_trigin: print('Warning: found datasets named "trig_in". Jill-created ' + '"trig_in" datasets segfault when read, so these datasets' + ' were skipped. If you know the datasets are good, rename' + ' them and try again.') return bark.Root(root_path)
used_files = [args.kwik, args.arf] if args.probe: used_files.append(args.probe) for f in used_files: if not os.path.isfile(args.probe): raise IOError('no such file: {}'.format(f)) """ if not args.out: spikes_filename = os.path.splitext(os.path .split(args.arf_list[0])[-1])[0] \ + '_spikes.arf' else: spikes_filename = args.out start_sample = args.start_sample # defaults to 0 for arf_name in args.arf_list: with h5py.File(arf_name, 'r') as arf_file,\ arf.open_file(spikes_filename, 'w') as spikes_file: if args.kwik is not None: with h5py.File(args.kwik, 'r') as kwik_file: start_sample = main(kwik_file, arf_file, spikes_file, args.stim, args.lfp, args.pulse, args.stimchannel, args.probe, start_sample=start_sample) else: start_sample = main(None, arf_file, spikes_file, args.stim, args.lfp, args.pulse, args.stimchannel, args.probe, start_sample=start_sample) print("final sample: {}".format(start_sample))
def test08_check_file_version(): fp = arf.open_file("test08", mode="a", driver="core", backing_store=False) arf.check_file_version(fp)
def __enter__(self): self.file = arf.open_file(self.filename) return self
def test07_append_to_table(): fp = arf.open_file("test07", mode="a", driver="core", backing_store=False) dset = arf.create_table(fp, 'test', dtype=nx.dtype([('f1', nx.uint), ('f2', nx.int32)])) assert_equal(dset.shape[0], 0) arf.append_data(dset, (5, 10)) assert_equal(dset.shape[0], 1)
def collect_sampled_script(argv=None): from natsort import natsorted import argparse p = argparse.ArgumentParser(prog="arfx-collect-sampled", description="collect sampled data from arf files across channels and entries") p.add_argument('--version', action="version", version="%(prog)s " + __version__) p.add_argument('-v', '--verbose', help="show verbose log messages", action="store_true") p.add_argument("-d", "--dtype", help="convert data to specified type (default is to use as stored)") # p.add_argument("-b", "--bark", help="output bark meta.yml file", action="store_true") p.add_argument("-c", "--channels", help="list of channels to unpack (default all)", metavar='CHANNEL', nargs="+") p.add_argument('-e', '--entries', help="list of entries to unpack (default all)", metavar='ENTRY', nargs='+') p.add_argument("arffile", help="the ARF file to unpack") p.add_argument("outfile", help="the output file (will be overwritten)") args = p.parse_args(argv) ch = logging.StreamHandler() formatter = logging.Formatter("[%(name)s] %(message)s") if args.verbose: loglevel = logging.DEBUG else: loglevel = logging.INFO log.setLevel(loglevel) ch.setLevel(loglevel) # change ch.setFormatter(formatter) log.addHandler(ch) with arf.open_file(args.arffile, "r") as arfp: log.info("unpacking '%s'", args.arffile) arf.check_file_version(arfp) entry_names, channel_props = check_entry_consistency(arfp, args.entries, args.channels, predicate=arf.is_time_series) if not all_items_equal(channel_props, operator.itemgetter("sampling_rate")): log.warn(" - warning: not all datasets have the same sampling rate") if not all_items_equal(channel_props, operator.itemgetter("units")): log.warn(" - warning: not all datasets have the same units") nentries = len(entry_names) nchannels = sum(channel_props[c]["channels"] for c in channel_props) sampling_rate = first(channel_props, operator.itemgetter("sampling_rate")) if args.dtype is None: dtype = first(channel_props, operator.itemgetter("dtype")) else: dtype = args.dtype log.info(" - channels (%d):", nchannels) for cname in natsorted(channel_props): log.info(" - %s", cname) log.info("opening '%s' for output", args.outfile) log.info(" - sampling rate = %f", sampling_rate) log.info(" - dtype = '%s'", dtype) log.info(" - entries (%d):", nentries) with io.open(args.outfile, mode="w", sampling_rate=sampling_rate, dtype=dtype, nchannels=nchannels) as ofp: for entry_name in natsorted(entry_names): entry = arfp[entry_name] # nsamples = first(entry, operator.attrgetter("shape"))[0] # would be more efficient to preallocate but this is easy data = np.column_stack(entry[cname][:] for cname in natsorted(channel_props)) ofp.write(data) log.info(" - '%s' -> %d samples", entry_name, data.shape[0])
# -*- coding: utf-8 -*- # -*- mode: python -*- # test harness for arf interface. assumes the underlying hdf5 and h5py libraries # are working. from nose.tools import * from nose.plugins.skip import SkipTest from distutils import version import numpy as nx import arf import time from numpy.random import randn, randint fp = arf.open_file("test", 'w', driver="core", backing_store=False) entry_base = "entry_%03d" tstamp = time.mktime(time.localtime()) entry_attributes = {'intattr': 1, 'vecattr': [1, 2, 3], 'arrattr': randn(5), 'strattr': "an attribute", } datasets = [dict(name="acoustic", data=randn(100000), sampling_rate=20000, datatype=arf.DataTypes.ACOUSTIC, maxshape=(None,), microphone="DK-1234", compression=0), dict(name="neural",
def main(argv=None): import argparse from .core import __version__ p = argparse.ArgumentParser(prog="arfx-split", description=__doc__) p.add_argument('--version', action='version', version='%(prog)s ' + __version__) p.add_argument('-v', help='verbose output', action='store_true', dest='verbose') p.add_argument("--duration", "-T", help="the maximum duration of entries " "(default: %(default).2f seconds)", type=float, default=600) p.add_argument("--compress", "-z", help="set compression level in output file " "(default: %(default)d)", type=int, default=1) p.add_argument("--dry-run", "-n", help="don't actually create the target file or copy data", action="store_true") p.add_argument("--append", "-a", help="if true, will append data from src to tgt (default " "is to overwrite). Note that log files are NOT merged in this mode", action="store_true") p.add_argument("src", help="the ARF files to chunk up", nargs="+") p.add_argument("tgt", help="the destination ARF file") args = p.parse_args(argv) ch = logging.StreamHandler() formatter = logging.Formatter("[%(name)s] %(message)s") if args.verbose: loglevel = logging.DEBUG else: loglevel = logging.INFO log.setLevel(loglevel) ch.setLevel(loglevel) # change ch.setFormatter(formatter) log.addHandler(ch) # open all input files and sort entries by timestamp log.info("sorting source file entries by timestamp") srcs = [h5.File(fname, "r") for fname in args.src] entries = sorted(itertools.chain.from_iterable(entry_timestamps(fp) for fp in srcs), key=operator.itemgetter(1)) if args.verbose: log.debug("entry order:") for entry, timestamp in entries: log.debug(" %s%s (time=%s)", os.path.basename(entry.file.filename), entry.name, timestamp) # open output file if not args.dry_run: if args.append: tgt_file = arf.open_file(args.tgt, mode="a") log.info("appending to destination file: %s", tgt_file.filename) log.info(" counting entries...") tgt_entry_index = arf.count_children(tgt_file, h5.Group) else: tgt_file = arf.open_file(args.tgt, mode="w") log.info("created destination file: %s", tgt_file.filename) jilllog = merge_jill_logs(srcs) if jilllog is not None: tgt_file.create_dataset("jill_log", data=jilllog, compression=args.compress) log.info("merged jill_log datasets") tgt_entry_index = 0 # iterate through source entries, then chunk up datasets for entry, timestamp in entries: log.info("source entry: %s%s", os.path.basename(entry.file.filename), entry.name) max_duration = entry_duration(entry) n_chunks = int(max_duration // args.duration) + 1 log.debug(" max duration: %3.2f s (chunks=%d)", max_duration, n_chunks) for i in range(n_chunks): tgt_entry_name = "entry_%05d" % tgt_entry_index tgt_timestamp = timestamp + datetime.timedelta(seconds=args.duration) * i # create target entry log.info(" target entry: %s (time=%s)", tgt_entry_name, tgt_timestamp) tgt_entry_index += 1 # set target entry attributes if not args.dry_run: tgt_entry = arf.create_entry(tgt_file, tgt_entry_name, tgt_timestamp) for k, v in entry.attrs.items(): if k == "timestamp": continue elif k == "uuid": k = "origin-uuid" tgt_entry.attrs[k] = v tgt_entry.attrs["origin-file"] = os.path.basename(entry.file.filename) tgt_entry.attrs["origin-entry"] = os.path.basename(entry.name) for dset_name, dset in entry.items(): if not arf.is_time_series(dset): log.debug(" %s: (not sampled)", dset_name) continue sampling_rate = dset.attrs['sampling_rate'] chunk_size = int(args.duration * sampling_rate) start = chunk_size * i stop = min(start + chunk_size, dset.shape[0]) data = dset[start:stop] log.debug(" %s: [%d:%d]", dset_name, start, stop) if not args.dry_run: tgt_attrs = dict(dset.attrs) try: tgt_attrs['origin-uuid'] = tgt_attrs.pop('uuid') except KeyError: pass arf.create_dataset(tgt_entry, dset_name, data, compression=args.compress, **tgt_attrs)