def add_lfp(spike_entry, raw_entry, Nlfp, cutoff=300, order=4, ripple=20, lfp_sampling_rate=1000, verbose=True): """adds first N lfp channels to the spike_entry""" data_channels = [x for x in raw_entry.values() if isinstance(x, h5py.Dataset) and 'datatype' in x.attrs and int(x.attrs['datatype']) < 1000] print(len(data_channels)) data_channels = sorted(data_channels, key=repr)[:Nlfp] for chan in data_channels: if verbose: print("lfp chan: {}".format(chan)) # low pass filter b, a = cheby2(order, ripple, cutoff / (chan.attrs['sampling_rate'] / 2.)) lfp = filtfilt(b, a, chan) # resample old_x = np.arange(len(chan)) / chan.attrs['sampling_rate'] resample_ratio = chan.attrs['sampling_rate'] / lfp_sampling_rate new_x = np.arange(len(chan) / resample_ratio) / lfp_sampling_rate resamp_lfp = np.interp(new_x, old_x, lfp) arf.create_dataset(spike_entry, chan.name, resamp_lfp, units='samples', datatype=2, sampling_rate=lfp_sampling_rate)
def test06_creation_iter(): fp = arf.open_file("test06", mode="a", driver="core", backing_store=False) entry_names = ('z', 'y', 'a', 'q', 'zzyfij') for name in entry_names: g = arf.create_entry(fp, name, 0) arf.create_dataset(g, "dset", (), sampling_rate=1) assert_sequence_equal(arf.keys_by_creation(fp), entry_names)
def add_spikes(spike_entry, kwik_file, start_sample, stop_sample): """adds all spikes between the start and stop samples from kwik file into spike_entry""" for shanknum, shank_group in enumerate(kwik_file['shanks'].values()): allspikes = shank_group['spikes'] allwaves = shank_group['waveforms']['waveform_filtered'] time_mask = np.logical_and(allspikes['time'] >= start_sample, allspikes['time'] < stop_sample) spikes = allspikes[time_mask] waves = allwaves[time_mask] # change 'time' field to 'start' for arf compatibility spikes.dtype.names = tuple([x if x != 'time' else 'start' for x in spikes.dtype.names]) spikes['start'] = spikes['start'] - start_sample spike_dset_name = 'spikes_{}'.format(shanknum + 1) waves_dset_name = 'waves_{}'.format(shanknum + 1) if spike_dset_name not in spike_entry: spike_samplerate = arf_samplerate(args.arf_list[0]) # TODO better method units = [x.encode('utf8') for x in ('ID', 'ID', 'none', 'none', 'samples')] arf.create_dataset(spike_entry, spike_dset_name, spikes, units=units, datatype=1001, sampling_rate=spike_samplerate) arf.create_dataset(spike_entry, waves_dset_name, waves, units='samples', datatype=11001, sampling_rate=arf_samplerate(args.arf_list[0])) else: spike_entry[spike_dset_name].value = np.append(spike_entry[spike_dset_name], spikes) spike_entry[waves_dset_name].value = np.append(spike_entry[waves_dset_name], waves)
def find_and_write_pulse_time(arf_file, arf_entry_name, pulsechan, spike_entry, verbose=True): pulsetime = stimalign.detect_pulse(arf_file[arf_entry_name][pulsechan]) pulse_sampling_rate = arf_file[arf_entry_name][pulsechan].attrs['sampling_rate'] arf.create_dataset(spike_entry, 'pulse', np.array([pulsetime]), units='samples', datatype=1000, sampling_rate=pulse_sampling_rate) if verbose: print("pulse: {}".format(pulsetime))
def label(motif_file, recordings, label, label_name='auto_lbl'): # """ Creates label entry from motif matches Parameters ---------- motif_file : An hdf5 file containing clustered motif matches as generated by birdwerdz.hdf.classify recordings : The name of the arf(hdf5) file containings the raw recordings to be labeled label : Path to template label in recordings file label_name : Name of the label datasets to be made """ #todo: convert starts and stops to spectrogram index with h5py.File(recordings, 'r+') as rec_file: template_lbl = rec_file[label] units = template_lbl.attrs['units'] unit_args = {'units' : ['', units, units]} if units == 's': spec_res = tstep elif units == 'ms': spec_res = tstep * 1000 elif units == 'samples': sr = template_lbl.attrs['sampling_rate'] spec_res = tstep*float(sr) unit_args['sampling_rate'] = sr with h5py.File(motif_file, 'r+') as motif: #getting length of template spectrogram template_len = None for entry in motif.itervalues(): if (isinstance(entry,h5py.Group) and 'motifs' in entry.keys() and entry['motifs'].size): template_len = entry['motifs']['dtw_path'].shape[0] break if template_len is None: return start_idx = [max(0,int(start/spec_res)) for start in template_lbl['start']] stop_idx = [min(int(stop/spec_res),template_len) for stop in template_lbl['stop']] names = template_lbl['name'] for entry in motif.values(): if (not isinstance(entry,h5py.Group) or 'motifs' not in entry.keys() or not entry['motifs'].size): continue dtype = [('name', 'a' + str(max([len(x) for x in names]))), ('start', float), ('stop', float)] lbl = np.array([(l, m['dtw_path'][b]*spec_res, m['dtw_path'][e]*spec_res) for m in entry['motifs'] for l,b,e in zip(names, start_idx, stop_idx)], dtype=dtype) arf.create_dataset(rec_file[entry.name], label_name, data=lbl, maxshape=(None,), datatype=2002,**unit_args)
def copy(kfile, afile, datatypes=0): """ copies the contents of .kwd hdf5 file to a .arf hdf5 file """ # copy top level attributes for k,v in kfile.attrs.items(): afile.attrs[k] = v # descend into /recordings recordings = kfile["recordings"] # recordings holds the "entries", which have names "0", "1", "2", etc for kname, kentry in recordings.items(): timestamp = 0 # TODO determine correct timestamp e = arf.create_entry(afile, kname, timestamp) for k,v in kentry.attrs.items(): e.attrs[k] = v kdata = kentry["data"] if len(datatypes) == 1: datatypes = datatypes * kdata.shape[1] else: assert len(datatypes) == kdata.shape[1] channel_bit_volts = kentry["application_data"].attrs["channel_bit_volts"] channel_sample_rates = kentry["application_data"].attrs["channel_sample_rates"] # kwik files are SxN datasets, while in arf it's N datasets of length S for i in range(kdata.shape[1]): dset = arf.create_dataset(e, name=str(i), data=np.array([],dtype=np.int16), maxshape=(kdata.shape[0],), sampling_rate=channel_sample_rates[i], units='samples', datatype=datatypes[i], compression=6) dset.attrs["bit_volts"] = channel_bit_volts[i] for j in range(int(kdata.shape[0]/BUFFERSIZE) + 1): index = j*BUFFERSIZE arf.append_data(dset, kdata[index:index + BUFFERSIZE, i])
def save(stream, filename, path, sampling_rate=None, chunk_size=None): """ Saves a Stream object to an .arf file. Can't be called by an instance of ArfStreamer. """ if chunk_size == None: chunk_size = stream.chunk_size if sampling_rate == None: raise Exception("You must specify the sampling rate in ArfStreamer.save") with arf.open_file(filename, 'a') as file: path = path.split("/") dst_name = path[-1] grp_path = "/".join(path[:-1]) grp = file.require_group(grp_path) #Get first batch of data data = stream.read(chunk_size) try: dst = arf.create_dataset(grp, dst_name, data, maxshape=(None,), sampling_rate=sampling_rate) except: raise ValueError('Error, maybe dataset with that name already exists') while True: data = stream.read(chunk_size) if len(data) == 0: break arf.append_data(dst, data) file.flush()
def createtemparf(filename, datatype=0): root, ext = os.path.splitext(filename) arffile = arf.open_file(tempfile.mktemp()) if ext == '.lbl': lbl_rec = lbl.read(filename) print(lbl_rec) dset = arf.create_dataset(arffile, os.path.split(filename)[-1], lbl_rec, units=['','s','s'], datatype=2002) dset.attrs['units'] = 's' elif ext == '.wav': wavfile = ewave.open(filename) arf.create_dataset(arffile, os.path.split(filename)[-1], wavfile.read(), sampling_rate=wavfile.sampling_rate, datatype=1) elif ext =='.pcm': from arfx import pcmio pcmfile = pcmio.open(filename) arf.create_dataset(arffile, os.path.split(filename)[-1], pcmfile.read(), sampling_rate=pcmfile.sampling_rate, datatype=datatype) elif ext == '.pcm_seq2': from arfx import io pcmseqfile = io.open(filename) dataset_basename = os.path.split(filename)[-1] for i in xrange(pcmseqfile.nentries): dataset_name = '_'.join([dataset_basename, str(i)]) arf.create_dataset(arffile, dataset_name, pcmseqfile.read(), sampling_rate=pcmseqfile.sampling_rate, timestamp=pcmseqfile.timestamp, datatype=datatype) #try block added because pcmseqfile.nentries doesn't seem to always be accurate try: pcmseqfile.entry += 1 except ValueError: continue return arffile['/']
def createtemparf(filename, datatype=0): root, ext = os.path.splitext(filename) arffile = arf.open_file(tempfile.mktemp()) if ext == '.lbl': lbl_rec = lbl.read(filename) print(lbl_rec) dset = arf.create_dataset(arffile, os.path.split(filename)[-1], lbl_rec, units=['','s','s'], datatype=2002) dset.attrs['units'] = 's' elif ext == '.wav': wavfile = ewave.open(filename) arf.create_dataset(arffile, os.path.split(filename)[-1], wavfile.read(), sampling_rate=wavfile.sampling_rate, datatype=1) elif ext =='.pcm': from arfx import pcmio pcmfile = pcmio.open(filename) arf.create_dataset(arffile, os.path.split(filename)[-1], pcmfile.read(), sampling_rate=pcmfile.sampling_rate, datatype=datatype) return arffile['/']
def copy(kfile, afile, datatypes=0): """ copies the contents of .kwd hdf5 file to a .arf hdf5 file """ # copy top level attributes for k, v in kfile.attrs.items(): afile.attrs[k] = v # descend into /recordings recordings = kfile["recordings"] # recordings holds the "entries", which have names "0", "1", "2", etc for kname, kentry in recordings.items(): timestamp = 0 # TODO determine correct timestamp e = arf.create_entry(afile, kname, timestamp) for k, v in kentry.attrs.items(): e.attrs[k] = v kdata = kentry["data"] if len(datatypes) == 1: datatypes = datatypes * kdata.shape[1] else: assert len(datatypes) == kdata.shape[1] channel_bit_volts = kentry["application_data"].attrs[ "channel_bit_volts"] channel_sample_rates = kentry["application_data"].attrs[ "channel_sample_rates"] # kwik files are SxN datasets, while in arf it's N datasets of length S for i in range(kdata.shape[1]): dset = arf.create_dataset(e, name=str(i), data=np.ravel(kdata[:, i]), compression=6, sampling_rate=channel_sample_rates[i], units='samples', datatype=datatypes[i]) dset.attrs["bit_volts"] = channel_bit_volts[i]
def parse_explog(explog, entry_attrs, datatype, split_sites=False, compression=1, channels=None, dry_run=False): """Parses an explog file to figure out where all the data is stored, and when everything happened. Creates one or more arf files to hold the data, and stores data under the associated entry. datatype: specify the default type of data being recorded types may be specified in the explog; these have precedence channels: if not None, only store data from these entries Additional arguments are used to set attributes on the newly created entries. """ # base for created files arfbase = os.path.splitext(explog)[0] # look up source pcmseq2 file by channel name files = {} # dict of stimuli indexed by samplecount stimuli = {} # dataset attributes dset_attrs = {} # set of all onset times entries = {} fileonset = nx.uint64(0) # corresponds to C long long type lastonset = nx.uint64(0) pen = 0 site = 0 efp = open(explog, 'rU') for line_num, line in enumerate(efp): lstart = line[0:4] # control info if lstart == '%%%%': if line.rstrip().endswith('start'): fileonset = lastonset elif line.find('add') > -1: try: fields = line.partition('add')[-1].split() props = dict(f.split('=') for f in fields[1:]) if 'datatype' in props: props['datatype'] = getattr( arf.DataTypes, props['datatype'].upper()) dset_attrs[fields[0]] = props except (AttributeError, ValueError): log.warn( "L%d parse error: bad channel metadata: ignoring", line_num) # file creation elif lstart == "FFFF": try: fname, base, action = _reg_create.search(line).groups() except AttributeError: log.warn("L%d parse error: %s", line_num, line) continue if channels is not None and base not in channels: continue if action == 'created': ifname = os.path.join(os.path.dirname(explog), fname) try: files[base] = io.open(ifname, mode='r') except Exception as e: log.warn( "error opening source file '%s'; ARF files will be incomplete", ifname) log.debug(e) else: # file was closed; remove from list files.pop(base, None) # new pen or new site elif lstart == "IIII": fields = line.split() if fields[-2] == 'pen': pen = fields[-1] elif fields[-2] == 'site': site = fields[-1] # trigger lines elif lstart == "TTTT": if line.find("TRIG_OFF") > 0 or line.find("SONG_OFF") > 0: continue try: chan, entry, onset = _reg_triggeron.search(line).groups() except AttributeError: log.warn("L%d parse error: %s", line_num, line) continue if channels is not None and chan not in channels: continue try: ifp = files[chan] except KeyError: # user should already have been warned about missing data from # this file continue try: ifp.entry = int(entry) except ValueError: log.warn("L%d runtime error: unable to access %s/%d", line_num, int(entry), chan) continue lastonset = nx.uint64(onset) + fileonset entry_name = "e%ld" % lastonset ofname = target_file_template.format( arfbase, pen, site) if split_sites else base try: ofp = get_dest_arf(ofname, dry_run) except IOError: log.error("target file '%s' already exists; aborting", ofname) return -1 log.debug("%s/%s -> %s/%s/%s", ifp.filename, entry, ofp.filename, entry_name, chan) data = ifp.read() sampling_rate = ifp.sampling_rate if 'sampling_rate' in ofp.attrs: if ofp.attrs['sampling_rate'] != sampling_rate: log.error("%s/%s sampling rate (%d) doesn't match target file (%d).\n" "You may be attempting to load data from the wrong files!", ifp.filename, entry, sampling_rate, ofp.attrs['sampling_rate']) return -1 else: ofp.attrs['sampling_rate'] = sampling_rate if lastonset in entries: entry = ofp[entry_name] else: entry = arf.create_entry( ofp, entry_name, ifp.timestamp, sample_count=lastonset, sampling_rate=sampling_rate, entry_creator='org.meliza.arfx/arfxplog ' + core.__version__, pen=pen, site=site, **entry_attrs) entries[lastonset] = entry if chan in dset_attrs and 'datatype' in dset_attrs[chan]: chan_datatype = dset_attrs[chan]['datatype'] else: chan_datatype = datatype dset = arf.create_dataset(entry, name=chan, data=data, datatype=chan_datatype, sampling_rate=sampling_rate, compression=compression, source_file=ifp.filename, source_entry=ifp.entry) # store duration of longest dataset; could also get this from # TRIG_OFF line, but this is a bit simpler. if data.size > entry.attrs.get('trial_off', 0): entry.attrs['trial_off'] = data.size arf.set_uuid(dset, get_uuid(pen, site, chan)) # stimulus lines elif lstart == "QQQQ": try: rel, onset, stimname = _reg_stimulus.search(line).groups() lastonset = nx.uint64(onset) + fileonset if stimname.startswith('File='): stimname = stimname[5:] stimuli[lastonset] = stimname except AttributeError: log.warn("L%d parse error: %s", line_num, line) # done parsing file efp.close() match_stimuli(stimuli, entries, sampling_rate=sampling_rate)
def create_dataset(g, dset): d = arf.create_dataset(g, **dset) assert_equal(d.shape, dset['data'].shape)
def plot_dataset_list(self, dataset_list, data_layout, append=False): ''' plots a list of datasets to a data layout''' data_layout.clear() if not append: self.subplots = [] # rasterQPainterPath = QtGui.QPainterPath().addRect(-.1,-5,.2,1) # TODO make a better raster # shape that works toes = [] for dataset in dataset_list: print(dataset) if 'datatype' not in dataset.attrs.keys(): print('{} is not an arf dataset'.format(repr(dataset))) if os.path.basename(dataset.name) == 'jill_log': print(dataset.value) continue '''sampled data''' if dataset.attrs['datatype'] < 1000: # sampled data if (self.settings_panel.oscillogram_check.checkState() ==QtCore.Qt.Checked): pl = downsamplePlot(dataset, title=dataset.name, name=str(len(self.subplots))) data_layout.addItem(pl,row=len(self.subplots), col=0) pl.setXRange(0, dataset.size/float(dataset.attrs['sampling_rate'])) pl.setYRange(np.min(dataset), np.max(dataset)) self.subplots.append(pl) pl.showGrid(x=True, y=True) ''' simple events ''' elif utils.is_simple_event(dataset): if dataset.attrs['units'] == 'ms': data = dataset.value / 1000. elif dataset.attrs['units'] == 'samples': data = dataset.value / dataset.attrs['sampling_rate'] else: data = dataset.value if (self.settings_panel.raster_check.checkState()==QtCore.Qt.Checked or self.settings_panel.psth_check.checkState()==QtCore.Qt.Checked or self.settings_panel.isi_check.checkState()==QtCore.Qt.Checked): toes.append(data) continue ''' complex event ''' elif utils.is_complex_event(dataset): if (self.settings_panel.label_check.checkState() ==QtCore.Qt.Checked): #creating new extensible dataset if not extensible if dataset.maxshape != (None,): data = dataset[:] name = dataset.name group= dataset.parent attributes = dataset.attrs del group[name] del dataset dataset = arf.create_dataset(group, name, data, maxshape=(None,),**attributes) pl = labelPlot(dataset, title=dataset.name, name=str(len(self.subplots))) data_layout.addItem(pl, row=len(self.subplots), col=0) pl.showLabel('left', show=False) self.subplots.append(pl) else: print('I don\'t know how to plot {} of type {} \ with datatype {}'.format(dataset, type(dataset), dataset.attrs['datatype'])) continue '''adding spectrograms''' if dataset.attrs['datatype'] in [0, 1]: # show spectrogram if (self.settings_panel.spectrogram_check.checkState() ==QtCore.Qt.Checked): #getting spectrogram settings sr = float(dataset.attrs['sampling_rate']) win_size_text = self.settings_panel.win_size.text() t_step_text = self.settings_panel.step.text() min_text = self.settings_panel.freq_min.text() max_text = self.settings_panel.freq_max.text() if win_size_text: win_size = int(float(win_size_text)) else: win_size = self.settings_panel.defaults['win_size'] self.settings_panel.win_size.setText(str(win_size)) if t_step_text: t_step = int(float(t_step_text) * sr/1000.) else: t_step = self.settings_panel.defaults['step'] self.settings_panel.win_size.setText(str(int(tstep*1000))) if min_text: freq_min = int(min_text) else: freq_min = self.settings_panel.defaults['freq_min'] self.settings_panel.freq_min.setText(str(freq_min)) if max_text: freq_max = int(max_text) else: freq_max = self.settings_panel.defaults['freq_max'] self.settings_panel.freq_max.setText(str(freq_max)) window_name = self.settings_panel.window.currentText() if window_name == "Hann": window = scipy.signal.hann(win_size) elif window_name == "Bartlett": window = scipy.signal.bartlett(win_size) elif window_name == "Blackman": window = scipy.signal.blackman(win_size) elif window_name == "Boxcar": window = scipy.signal.boxcar(win_size) elif window_name == "Hamming": window = scipy.signal.hamming(win_size) elif window_name == "Parzen": window = scipy.signal.parzen(win_size) #computing and interpolating image Pxx = libtfr.stft(dataset,w=window,step=t_step) spec = np.log(Pxx.T) res_factor = 1.0 #factor by which resolution is increased # spec = interpolate_spectrogram(spec, res_factor=res_factor) #making color lookup table pos = np.linspace(0,1,7) color = np.array([[100,100,255,255],[0,0,255,255],[0,255,255,255],[0,255,0,255], [255,255,0,255],[255,0,0,255],[100,0,0,255]], dtype=np.ubyte) color_map = pg.ColorMap(pos,color) lut = color_map.getLookupTable(0.0,1.0,256) img = pg.ImageItem(spec,lut=lut) #img.setLevels((-5, 10)) pl = data_layout.addPlot(name=str(len(self.subplots)), row=len(self.subplots), col=0) self.subplots.append(pl) pl.addItem(img) image_scale = t_step/sr/res_factor img.setScale(image_scale) df = sr/float(win_size) plot_scale = df/res_factor/image_scale pl.getAxis('left').setScale(plot_scale) pl.setXRange(0, dataset.size / dataset.attrs['sampling_rate']) pl.setYRange(freq_min/plot_scale, freq_max/plot_scale) pl.setMouseEnabled(x=True, y=False) if toes: if self.settings_panel.raster_check.checkState()==QtCore.Qt.Checked: pl= rasterPlot(toes) data_layout.addItem(pl, row=len(self.subplots), col=0) pl.showLabel('left', show=False) self.subplots.append(pl) if self.settings_panel.psth_check.checkState()==QtCore.Qt.Checked: all_toes = np.zeros(sum(len(t) for t in toes)) k=0 for t in toes: all_toes[k:k+len(t)] = t k += len(t) if self.settings_panel.psth_bin_size.text(): bin_size = float(self.settings_panel.psth_bin_size.text())/1000. else: bin_size = .01 bins = np.arange(all_toes.min(),all_toes.max()+bin_size,bin_size) y,x = np.histogram(all_toes,bins=bins) psth = pg.PlotCurveItem(x, y, stepMode=True, fillLevel=0, brush=(0, 0, 255, 80)) pl = data_layout.addPlot(row=len(self.subplots), col=0) pl.addItem(psth) pl.setMouseEnabled(y=False) self.subplots.append(pl) if self.settings_panel.isi_check.checkState()==QtCore.Qt.Checked: isis = np.zeros(sum(len(t)-1 for t in toes)) k=0 for t in toes: isis[k:k+len(t)-1] = np.diff(t) k += len(t)-1 if self.settings_panel.psth_bin_size.text(): bin_size = float(self.settings_panel.psth_bin_size.text())/1000. else: bin_size = .01 bins = np.arange(isis.min(),isis.max()+bin_size,bin_size) y,x = np.histogram(isis,bins=bins,normed=True) isi_hist = pg.PlotCurveItem(x, y, stepMode=True, fillLevel=0, brush=(0, 0, 255, 80)) pl = data_layout.addPlot(row=len(self.subplots), col=0) pl.addItem(isi_hist) pl.setMouseEnabled(y=False) self.subplots.append(pl) '''linking x axes''' masterXLink = None for pl in self.subplots: if not masterXLink: masterXLink = pl pl.setXLink(masterXLink)
def main(argv=None): import argparse from .core import __version__ p = argparse.ArgumentParser(prog="arfx-split", description=__doc__) p.add_argument('--version', action='version', version='%(prog)s ' + __version__) p.add_argument('-v', help='verbose output', action='store_true', dest='verbose') p.add_argument("--duration", "-T", help="the maximum duration of entries " "(default: %(default).2f seconds)", type=float, default=600) p.add_argument("--compress", "-z", help="set compression level in output file " "(default: %(default)d)", type=int, default=1) p.add_argument("--dry-run", "-n", help="don't actually create the target file or copy data", action="store_true") p.add_argument("--append", "-a", help="if true, will append data from src to tgt (default " "is to overwrite). Note that log files are NOT merged in this mode", action="store_true") p.add_argument("src", help="the ARF files to chunk up", nargs="+") p.add_argument("tgt", help="the destination ARF file") args = p.parse_args(argv) ch = logging.StreamHandler() formatter = logging.Formatter("[%(name)s] %(message)s") if args.verbose: loglevel = logging.DEBUG else: loglevel = logging.INFO log.setLevel(loglevel) ch.setLevel(loglevel) # change ch.setFormatter(formatter) log.addHandler(ch) # open all input files and sort entries by timestamp log.info("sorting source file entries by timestamp") srcs = [h5.File(fname, "r") for fname in args.src] entries = sorted(itertools.chain.from_iterable(entry_timestamps(fp) for fp in srcs), key=operator.itemgetter(1)) if args.verbose: log.debug("entry order:") for entry, timestamp in entries: log.debug(" %s%s (time=%s)", os.path.basename(entry.file.filename), entry.name, timestamp) # open output file if not args.dry_run: if args.append: tgt_file = arf.open_file(args.tgt, mode="a") log.info("appending to destination file: %s", tgt_file.filename) log.info(" counting entries...") tgt_entry_index = arf.count_children(tgt_file, h5.Group) else: tgt_file = arf.open_file(args.tgt, mode="w") log.info("created destination file: %s", tgt_file.filename) jilllog = merge_jill_logs(srcs) if jilllog is not None: tgt_file.create_dataset("jill_log", data=jilllog, compression=args.compress) log.info("merged jill_log datasets") tgt_entry_index = 0 # iterate through source entries, then chunk up datasets for entry, timestamp in entries: log.info("source entry: %s%s", os.path.basename(entry.file.filename), entry.name) max_duration = entry_duration(entry) n_chunks = int(max_duration // args.duration) + 1 log.debug(" max duration: %3.2f s (chunks=%d)", max_duration, n_chunks) for i in range(n_chunks): tgt_entry_name = "entry_%05d" % tgt_entry_index tgt_timestamp = timestamp + datetime.timedelta(seconds=args.duration) * i # create target entry log.info(" target entry: %s (time=%s)", tgt_entry_name, tgt_timestamp) tgt_entry_index += 1 # set target entry attributes if not args.dry_run: tgt_entry = arf.create_entry(tgt_file, tgt_entry_name, tgt_timestamp) for k, v in entry.attrs.items(): if k == "timestamp": continue elif k == "uuid": k = "origin-uuid" tgt_entry.attrs[k] = v tgt_entry.attrs["origin-file"] = os.path.basename(entry.file.filename) tgt_entry.attrs["origin-entry"] = os.path.basename(entry.name) for dset_name, dset in entry.items(): if not arf.is_time_series(dset): log.debug(" %s: (not sampled)", dset_name) continue sampling_rate = dset.attrs['sampling_rate'] chunk_size = int(args.duration * sampling_rate) start = chunk_size * i stop = min(start + chunk_size, dset.shape[0]) data = dset[start:stop] log.debug(" %s: [%d:%d]", dset_name, start, stop) if not args.dry_run: tgt_attrs = dict(dset.attrs) try: tgt_attrs['origin-uuid'] = tgt_attrs.pop('uuid') except KeyError: pass arf.create_dataset(tgt_entry, dset_name, data, compression=args.compress, **tgt_attrs)