def roundtripContainer(self): data_file = NWBFile( session_description='a test file', identifier='data_file', session_start_time=self.start_time ) data_file.add_acquisition(self.container) with HDF5IO(self.data_filename, 'w', manager=get_manager()) as self.data_write_io: self.data_write_io.write(data_file) with HDF5IO(self.data_filename, 'r', manager=get_manager()) as self.data_read_io: data_file_obt = self.data_read_io.read() with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: link_file = NWBFile( session_description='a test file', identifier='link_file', session_start_time=self.start_time ) link_file.add_acquisition(TimeSeries( name='test_mod_ts', unit='V', data=data_file_obt.get_acquisition('data_ts'), timestamps=H5DataIO( data=data_file_obt.get_acquisition('data_ts').timestamps, link_data=True ) )) link_write_io.write(link_file) with HDF5IO(self.link_filename, 'r', manager=get_manager()) as self.link_file_reader: return self.getContainer(self.link_file_reader.read())
def write_lfp( nwbfile: NWBFile, data: ArrayLike, fs: float, electrode_inds: Optional[List[int]] = None, name: Optional[str] = "LFP", description: Optional[str] = "local field potential signal", ): """ Add LFP from neuroscope to a "ecephys" processing module of an NWBFile. Parameters ---------- nwbfile: pynwb.NWBFile data: array-like fs: float electrode_inds: list(int), optional name: str, optional description: str, optional Returns ------- LFP pynwb.ecephys.ElectricalSeries """ if electrode_inds is None: if nwbfile.electrodes is not None and data.shape[1] <= len( nwbfile.electrodes.id.data[:]): electrode_inds = list(range(data.shape[1])) else: electrode_inds = list(range(len(nwbfile.electrodes.id.data[:]))) table_region = nwbfile.create_electrode_table_region( electrode_inds, "electrode table reference") data = H5DataIO( DataChunkIterator(tqdm(data, desc="writing lfp data"), buffer_size=int(fs * 3600)), compression="gzip", ) lfp_electrical_series = ElectricalSeries( name=name, description=description, data=data, electrodes=table_region, conversion=1e-6, rate=fs, resolution=np.nan, ) ecephys_mod = check_module( nwbfile, "ecephys", "intermediate data from extracellular electrophysiology recordings, e.g., LFP", ) if "LFP" not in ecephys_mod.data_interfaces: ecephys_mod.add_data_interface(LFP(name="LFP")) ecephys_mod.data_interfaces["LFP"].add_electrical_series( lfp_electrical_series) return lfp_electrical_series
def createCompressedDataset(array): """ Request compression for the given array and return it wrapped. """ return H5DataIO(data=array, compression=True, chunks=True, shuffle=True, fletcher32=True)
def run_conversion(self, nwbfile: NWBFile, metadata: dict): event_file = self.source_data['file_path'] events_data = pd.read_csv(event_file, delimiter=";") event_timestamps = events_data['Time'].to_numpy() / 1E3 event_labels = events_data['Tag'].to_numpy() unique_events = set(event_labels) events_map = {event: n for n, event in enumerate(unique_events)} event_data = [events_map[event] for event in event_labels] # Custom labeled events events = LabeledEvents( name='LabeledEvents', description='Events from the experiment.', timestamps=H5DataIO(event_timestamps, compression="gzip"), resolution=np.nan, data=H5DataIO(event_data, compression="gzip"), labels=list(unique_events) # does not suppoort compression ) nwbfile.add_acquisition(events)
def add_position_data(nwbfile, session_path, fs=1250. / 32., names=('x0', 'y0', 'x1', 'y1')): """Read raw position sensor data from .whl file Parameters ---------- nwbfile: pynwb.NWBFile session_path: str fs: float sampling rate names: iterable names of column headings """ session_name = os.path.split(session_path)[1] whl_path = os.path.join(session_path, session_name + '.whl') if not os.path.isfile(whl_path): print(whl_path + ' file not found!') return print('warning: time may not be aligned') df = pd.read_csv(whl_path, sep='\t', names=names) df.index = np.arange(len(df)) / fs df.index.name = 'tt (sec)' nwbfile.add_acquisition( SpatialSeries('position_sensor0', H5DataIO(df[['x0', 'y0']].values, compression='gzip'), 'unknown', description='raw sensor data from sensor 0', timestamps=H5DataIO(df.index.values, compression='gzip'), resolution=np.nan)) nwbfile.add_acquisition( SpatialSeries('position_sensor1', H5DataIO(df[['x1', 'y1']].values, compression='gzip'), 'unknown', description='raw sensor data from sensor 1', timestamps=H5DataIO(df.index.values, compression='gzip'), resolution=np.nan))
def write_lfp(nwbfile, data, fs, name='LFP', description='local field potential signal', electrode_inds=None): """ Add LFP from neuroscope to a "ecephys" processing module of an NWBFile Parameters ---------- nwbfile: pynwb.NWBFile data: array-like fs: float name: str description: str electrode_inds: list(int) Returns ------- LFP pynwb.ecephys.ElectricalSeries """ if electrode_inds is None: electrode_inds = list(range(data.shape[1])) table_region = nwbfile.create_electrode_table_region( electrode_inds, 'electrode table reference') data = H5DataIO(DataChunkIterator(tqdm(data, desc='writing lfp data'), buffer_size=int(fs * 3600)), compression='gzip') lfp_electrical_series = ElectricalSeries(name=name, description=description, data=data, electrodes=table_region, conversion=np.nan, rate=fs, resolution=np.nan) ecephys_mod = check_module( nwbfile, 'ecephys', 'intermediate data from extracellular electrophysiology recordings, e.g., LFP' ) if 'LFP' not in ecephys_mod.data_interfaces: ecephys_mod.add_data_interface(LFP(name='LFP')) ecephys_mod.data_interfaces['LFP'].add_electrical_series( lfp_electrical_series) return lfp_electrical_series
def add_position_data(nwbfile: NWBFile, session_path: str, fs: float = 1250. / 32., names=('x0', 'y0', 'x1', 'y1')): """Read raw position sensor data from .whl file. Parameters ---------- nwbfile: pynwb.NWBFile session_path: str fs: float sampling rate names: iterable names of column headings """ session_name = os.path.split(session_path)[1] whl_path = os.path.join(session_path, session_name + '.whl') if not os.path.isfile(whl_path): print(whl_path + ' file not found!') return df = pd.read_csv(whl_path, sep='\t', names=names) nwbfile.add_acquisition( SpatialSeries('position_sensor0', H5DataIO(df[['x0', 'y0']].values, compression='gzip'), 'unknown', description='raw sensor data from sensor 0', starting_time=0., rate=fs, resolution=np.nan)) nwbfile.add_acquisition( SpatialSeries('position_sensor1', H5DataIO(df[['x1', 'y1']].values, compression='gzip'), 'unknown', description='raw sensor data from sensor 1', starting_time=0., rate=fs, resolution=np.nan))
def append_spike_times(input_nwb_path: PathLike, sweep_spike_times: Dict[int, List[float]], output_nwb_path: Optional[PathLike] = None): """ Appends spiketimes to an nwb2 file Paramters --------- input_nwb_path: location of input nwb file without spiketimes spike_times: Dict of sweep_num: spiketimes output_nwb_path: optional location to write new nwb file with spiketimes, otherwise appends spiketimes to input file """ # Copy to new location if output_nwb_path and output_nwb_path != input_nwb_path: shutil.copy(input_nwb_path, output_nwb_path) nwb_path = output_nwb_path else: nwb_path = input_nwb_path nwb_io = pynwb.NWBHDF5IO(nwb_path, mode='a', load_namespaces=True) nwbfile = nwb_io.read() spikes_module = "spikes" # Add spikes only if not previously added if spikes_module not in nwbfile.processing.keys(): spike_module = ProcessingModule(name=spikes_module, description='detected spikes') for sweep_num, spike_times in sweep_spike_times.items(): wrapped_spike_times = H5DataIO(data=np.asarray(spike_times), compression=True) ts = TimeSeries(timestamps=wrapped_spike_times, unit='seconds', data=wrapped_spike_times, name=f"Sweep_{sweep_num}") spike_module.add_data_interface(ts) nwbfile.add_processing_module(spike_module) nwb_io.write(nwbfile) else: raise ValueError("Cannot add spikes times to the nwb file: " "spikes times already exist!") nwb_io.close()
def add_LFP(nwbfile, expt, count=1, region='CA1'): eeg_local = [ x for x in os.listdir(expt.LFPFilePath()) if x.endswith('.eeg') ][0] eeg_file = os.path.join(expt.LFPFilePath(), eeg_local) eeg_base = eeg_file.replace('.eeg', '') eeg_dict = lfph.loadEEG(eeg_base) lfp_xml_fpath = eeg_base + '.xml' channel_groups = get_channel_groups(lfp_xml_fpath) lfp_channels = channel_groups[0] lfp_fs = eeg_dict['sampeFreq'] nchannels = eeg_dict['nChannels'] lfp_signal = eeg_dict['EEG'][lfp_channels].T device_name = 'LFP_Device_{}'.format(count) device = nwbfile.create_device(device_name) electrode_group = nwbfile.create_electrode_group(name=device_name + '_electrodes', description=device_name, device=device, location=region) x, y, z = get_position(region) for channel in channel_groups[0]: nwbfile.add_electrode( float(x), float(y), float(z), # position? imp=np.nan, location=region, filtering='See lab.misc.lfp_helpers.ConvertFromRHD', group=electrode_group, id=channel) lfp_table_region = nwbfile.create_electrode_table_region( list(range(len(lfp_channels))), 'lfp electrodes') # TODO add conversion field for moving to V # TODO figure out how to link lfp data (zipping seems kludgey) lfp_elec_series = ElectricalSeries(name='LFP', data=H5DataIO(lfp_signal, compression='gzip'), electrodes=lfp_table_region, conversion=np.nan, rate=lfp_fs, resolution=np.nan) nwbfile.add_acquisition(LFP(electrical_series=lfp_elec_series))
def peyrache_spatial_series(name: str, description: str, data: np.array, conversion: float, pos_sf: float = 1250 / 32): """Specific constructor for Peyrache style spatial series.""" return SpatialSeries( name=name, description=description, data=H5DataIO(data, compression="gzip"), conversion=conversion, reference_frame="Unknown", starting_time=0.0, rate=pos_sf, resolution=np.nan, )
def add_two_photon_series(imaging, nwbfile, metadata, buffer_size=10): """ Auxiliary static method for nwbextractor. Adds two photon series from imaging object as TwoPhotonSeries to nwbfile object. """ metadata = dict_recursive_update(get_default_nwb_metadata(), metadata) metadata = update_dict(metadata, NwbImagingExtractor.get_nwb_metadata(imaging)) # Tests if ElectricalSeries already exists in acquisition nwb_es_names = [ac for ac in nwbfile.acquisition] opts = metadata["Ophys"]["TwoPhotonSeries"][0] if opts["name"] not in nwb_es_names: # retrieve device device = nwbfile.devices[list(nwbfile.devices.keys())[0]] metadata["Ophys"]["ImagingPlane"][0]["optical_channel"] = [ OpticalChannel(**i) for i in metadata["Ophys"]["ImagingPlane"] [0]["optical_channel"] ] metadata["Ophys"]["ImagingPlane"][0] = update_dict( metadata["Ophys"]["ImagingPlane"][0], {"device": device}) imaging_plane = nwbfile.create_imaging_plane( **metadata["Ophys"]["ImagingPlane"][0]) def data_generator(imaging): for i in range(imaging.get_num_frames()): yield imaging.get_frames(frame_idxs=[i]).T data = H5DataIO( DataChunkIterator(data_generator(imaging), buffer_size=buffer_size), compression=True, ) # using internal data. this data will be stored inside the NWB file two_p_series_kwargs = update_dict( metadata["Ophys"]["TwoPhotonSeries"][0], dict(data=data, imaging_plane=imaging_plane), ) ophys_ts = TwoPhotonSeries(**two_p_series_kwargs) nwbfile.add_acquisition(ophys_ts) return nwbfile
def roundtripContainer(self): # create and write data file data_file = NWBFile( session_description='a test file', identifier='data_file', session_start_time=self.start_time ) data_file.add_acquisition(self.container) with HDF5IO(self.data_filename, 'w', manager=get_manager()) as data_write_io: data_write_io.write(data_file) # read data file with HDF5IO(self.data_filename, 'r', manager=get_manager()) as self.data_read_io: data_file_obt = self.data_read_io.read() # write "link file" with timeseries.data that is an external link to the timeseries in "data file" # also link timeseries.timestamps.data to the timeseries.timestamps in "data file" with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io: link_file = NWBFile( session_description='a test file', identifier='link_file', session_start_time=self.start_time ) self.link_container = TimeSeries( name='test_mod_ts', unit='V', data=data_file_obt.get_acquisition('data_ts'), # test direct link timestamps=H5DataIO( data=data_file_obt.get_acquisition('data_ts').timestamps, link_data=True # test with setting link data ) ) link_file.add_acquisition(self.link_container) link_write_io.write(link_file) # note that self.link_container contains a link to a dataset that is now closed # read the link file self.link_read_io = HDF5IO(self.link_filename, 'r', manager=get_manager()) self.read_nwbfile = self.link_read_io.read() return self.getContainer(self.read_nwbfile)
def run_conversion(self, nwbfile: NWBFile, metadata: dict = None, stub_test: bool = False): conditions = intervals_from_traces(self.recording_extractor) mech_stim = TimeIntervals( name='MechanicalStimulus', description= "Activation times inferred from TTL commands for mechanical stimulus." ) laser_stim = TimeIntervals( name='LaserStimulus', description= "Activation times inferred from TTL commands for cortical laser stimulus." ) for j, table in enumerate([mech_stim, laser_stim]): for row in conditions[j]: table.add_row( dict(start_time=float(row[0]), stop_time=float(row[1]))) # TODO - these really should be IntervalSeries added to stimulus, rather than processing check_module(nwbfile, 'stimulus', "Contains stimuli data.").add(mech_stim) check_module(nwbfile, 'stimulus', "Contains stimuli data.").add(laser_stim) if stub_test or self.subset_channels is not None: recording = self.subset_recording(stub_test=stub_test) else: recording = self.recording_extractor # Pressure values nwbfile.add_stimulus( TimeSeries( name='MechanicalPressure', data=H5DataIO(recording.get_traces(0), compression="gzip"), unit=self.recording_extractor._channel_smrxinfo[0]['unit'], conversion=recording.get_channel_property(0, 'gain'), rate=recording.get_sampling_frequency(), description= "Pressure sensor attached to the mechanical stimulus used to repeatedly evoke spiking." ))
def add_position_data( nwbfile: NWBFile, session_path: str, whl_file_path: OptionalPathType = None, starting_time: float = 0.0, fs: float = 1250.0 / 32.0, names=("x0", "y0", "x1", "y1"), ): """ Read and write raw position sensor data from .whl file. Parameters ---------- nwbfile: pynwb.NWBFile session_path: str fs: float sampling rate names: iterable names of column headings """ session_id = Path(session_path).name if whl_file_path is None: whl_path = session_path / f"{session_id}.whl" assert whl_file_path.is_file(), f".whl file ({whl_path}) not found!" df = pd.read_csv(whl_file_path, sep="\t", names=names) for x in [0, 1]: nwbfile.add_acquisition( SpatialSeries( name=f"PositionSensor{x}", description=f"Raw sensor data from sensor {x}.", data=H5DataIO(df[[f"x{x}", f"y{x}"]].values, compression="gzip"), reference_frame="Unknown", conversion=np.nan, # whl is in arbitrary units starting_time=starting_time, rate=fs, resolution=np.nan, ))
def run_conversion(self, nwbfile: NWBFile, metadata: dict): video_folder = Path(self.source_data['folder_path']) video_file_path_list = [ str(x) for x in video_folder.iterdir() if x.suffix == ".mkv" ] video_timestamps = np.empty(0) for video_file_path in video_file_path_list: video_time_df = pd.read_csv(video_file_path.replace( ".mkv", "_timestamps.csv"), delimiter=";", skipinitialspace=True) video_timestamps = np.append( video_timestamps, video_time_df['timestamp'].to_numpy() / 1E3) # Custom labeled events videos = ImageSeries(name='Videos', description="Videos recorded by TIS camera.", format="external", external_file=video_file_path_list, timestamps=H5DataIO(video_timestamps, compression="gzip")) nwbfile.add_acquisition(videos)
def create_acquisition(self): """ Acquisition data like audiospectrogram(raw beh data), nidq(raw ephys data), raw camera data. These are independent of probe type. """ for neurodata_type_name, neurodata_type_args_list in self.nwb_metadata[ 'Acquisition'].items(): data_retrieved_args_list = self._get_data(neurodata_type_args_list) for neurodata_type_args in data_retrieved_args_list: if neurodata_type_name == 'ImageSeries': for types, times in zip(neurodata_type_args['data'], neurodata_type_args['timestamps']): customargs = dict(name='camera_raw', external_file=[str(types)], format='external', timestamps=times, unit='n.a.') self.nwbfile.add_acquisition(ImageSeries(**customargs)) elif neurodata_type_name == 'DecompositionSeries': neurodata_type_args['bands'] = np.squeeze( neurodata_type_args['bands']) freqs = DynamicTable( 'bands', 'spectogram frequencies', id=np.arange(neurodata_type_args['bands'].shape[0])) freqs.add_column('freq', 'frequency value', data=neurodata_type_args['bands']) neurodata_type_args.update(dict(bands=freqs)) temp = neurodata_type_args['data'][:, :, np.newaxis] neurodata_type_args['data'] = np.moveaxis( temp, [0, 1, 2], [0, 2, 1]) ts = neurodata_type_args.pop('timestamps') starting_time = ts[0][0] if isinstance( ts[0], np.ndarray) else ts[0] neurodata_type_args.update( dict(starting_time=np.float64(starting_time), rate=1 / np.mean(np.diff(ts.squeeze())), unit='sec')) self.nwbfile.add_acquisition( DecompositionSeries(**neurodata_type_args)) elif neurodata_type_name == 'ElectricalSeries': if not self.electrode_table_exist: self.create_electrode_table_ecephys() if neurodata_type_args['name'] in ['raw.lf', 'raw.ap']: for probe_no in range(self.no_probes): if neurodata_type_args['data'][probe_no].shape[ 1] > self._one_data.data_attrs_dump[ 'electrode_table_length'][probe_no]: if 'channels.rawInd' in self._one_data.loaded_datasets: channel_idx = self._one_data.loaded_datasets[ 'channels.rawInd'][ probe_no].data.astype('int') else: warnings.warn( 'could not find channels.rawInd') break else: channel_idx = slice(None) self.nwbfile.add_acquisition( ElectricalSeries( name=neurodata_type_args['name'] + '_' + self.nwb_metadata['Probes'][probe_no] ['name'], starting_time=np.abs( np.round( neurodata_type_args['timestamps'] [probe_no][0, 1], 2) ), # round starting times of the order of 1e-5 rate=neurodata_type_args['data'] [probe_no].fs, data=H5DataIO( DataChunkIterator( _iter_datasetview( neurodata_type_args['data'] [probe_no], channel_ids=channel_idx), buffer_size=self.buffer_size), compression=True, shuffle=self.shuffle, compression_opts=self.complevel), electrodes=self.probe_dt_region[probe_no], channel_conversion=neurodata_type_args[ 'data'] [probe_no].channel_conversion_sample2v[ neurodata_type_args['data'] [probe_no].type][channel_idx])) elif neurodata_type_args['name'] in ['raw.nidq']: self.nwbfile.add_acquisition( ElectricalSeries(**neurodata_type_args))
def add_imaging(nwbfile, expt, z_spacing=25., device_name='2P Microscope', location='CA1', indicator='GCaMP6f', excitation_lambda=920., data_root=None, stub=False): color_dict = {'Ch1': 'Red', 'Ch2': 'Green'} # Emissions for mCherry and GCaMP # TODO make this more flexible emission = {'Ch1': 640., 'Ch2': 530.} ch_names = ['Ch1', 'Ch2'] optical_channels = [] for ch_name in ch_names: optical_channel = OpticalChannel(name=ch_name, description=color_dict[ch_name], emission_lambda=emission[ch_name]) optical_channels.append(optical_channel) h5_path = glob(os.path.join(data_root, '*.h5'))[0] pv_xml = os.path.join(data_root, os.path.basename(data_root) + '.xml') pv_version = get_prairieview_version(pv_xml) [y_um, x_um] = get_element_size_um(pv_xml, pv_version)[-2:] elem_size_um = [z_spacing, y_um, x_um] # TODO allow for flexibility in setting device, excitation, indicator, location # TODO nwb-schema issue #151 needs to be resolved so we can actually use imaging data size device = nwbfile.create_device(device_name) imaging_plane = nwbfile.create_imaging_plane( name='Imaging Data', optical_channel=optical_channels, description='imaging data for both channels', device=device, excitation_lambda=excitation_lambda, imaging_rate=1 / expt.frame_period(), indicator=indicator, location=location, conversion=1.0, # Should actually be elem_size_um manifold=np.ones((2, 2, 2, 3)), reference_frame='reference_frame', unit='um') f = h5py.File(h5_path, 'r') imaging_data = f['imaging'] channel_names = f['imaging'].attrs['channel_names'] for c, channel_name in enumerate(channel_names): if not stub: data_in = H5DataIO(DataChunkIterator(tqdm( (np.swapaxes(data[..., c], 0, 2) for data in imaging_data), total=imaging_data.shape[0]), buffer_size=5000), compression='gzip') else: data_in = np.ones((10, 10, 10)) # use for dev testing for speed # TODO parse env file to add power and pmt gain? image_series = TwoPhotonSeries(name='2p_Series_' + channel_name, dimension=expt.frame_shape()[:-1], data=data_in, imaging_plane=imaging_plane, rate=1 / expt.frame_period(), starting_time=0., description=channel_name) nwbfile.add_acquisition(image_series)
def yuta2nwb(session_path='/Users/bendichter/Desktop/Buzsaki/SenzaiBuzsaki2017/YutaMouse41/YutaMouse41-150903', subject_xls=None, include_spike_waveforms=True, stub=True): subject_path, session_id = os.path.split(session_path) fpath_base = os.path.split(subject_path)[0] identifier = session_id mouse_number = session_id[9:11] if '-' in session_id: subject_id, date_text = session_id.split('-') b = False else: subject_id, date_text = session_id.split('b') b = True if subject_xls is None: subject_xls = os.path.join(subject_path, 'YM' + mouse_number + ' exp_sheet.xlsx') else: if not subject_xls[-4:] == 'xlsx': subject_xls = os.path.join(subject_xls, 'YM' + mouse_number + ' exp_sheet.xlsx') session_start_time = dateparse(date_text, yearfirst=True) df = pd.read_excel(subject_xls) subject_data = {} for key in ['genotype', 'DOB', 'implantation', 'Probe', 'Surgery', 'virus injection', 'mouseID']: names = df.iloc[:, 0] if key in names.values: subject_data[key] = df.iloc[np.argmax(names == key), 1] if isinstance(subject_data['DOB'], datetime): age = session_start_time - subject_data['DOB'] else: age = None subject = Subject(subject_id=subject_id, age=str(age), genotype=subject_data['genotype'], species='mouse') nwbfile = NWBFile(session_description='mouse in open exploration and theta maze', identifier=identifier, session_start_time=session_start_time.astimezone(), file_create_date=datetime.now().astimezone(), experimenter='Yuta Senzai', session_id=session_id, institution='NYU', lab='Buzsaki', subject=subject, related_publications='DOI:10.1016/j.neuron.2016.12.011') print('reading and writing raw position data...', end='', flush=True) ns.add_position_data(nwbfile, session_path) shank_channels = ns.get_shank_channels(session_path)[:8] all_shank_channels = np.concatenate(shank_channels) print('setting up electrodes...', end='', flush=True) hilus_csv_path = os.path.join(fpath_base, 'early_session_hilus_chans.csv') lfp_channel = get_reference_elec(subject_xls, hilus_csv_path, session_start_time, session_id, b=b) print(lfp_channel) custom_column = [{'name': 'theta_reference', 'description': 'this electrode was used to calculate LFP canonical bands', 'data': all_shank_channels == lfp_channel}] ns.write_electrode_table(nwbfile, session_path, custom_columns=custom_column, max_shanks=max_shanks) print('reading LFPs...', end='', flush=True) lfp_fs, all_channels_data = ns.read_lfp(session_path, stub=stub) lfp_data = all_channels_data[:, all_shank_channels] print('writing LFPs...', flush=True) # lfp_data[:int(len(lfp_data)/4)] lfp_ts = ns.write_lfp(nwbfile, lfp_data, lfp_fs, name='lfp', description='lfp signal for all shank electrodes') for name, channel in special_electrode_dict.items(): ts = TimeSeries(name=name, description='environmental electrode recorded inline with neural data', data=all_channels_data[channel], rate=lfp_fs, unit='V', conversion=np.nan, resolution=np.nan) nwbfile.add_acquisition(ts) # compute filtered LFP print('filtering LFP...', end='', flush=True) all_lfp_phases = [] for passband in ('theta', 'gamma'): lfp_fft = filter_lfp(lfp_data[:, all_shank_channels == lfp_channel].ravel(), lfp_fs, passband=passband) lfp_phase, _ = hilbert_lfp(lfp_fft) all_lfp_phases.append(lfp_phase[:, np.newaxis]) data = np.dstack(all_lfp_phases) print('done.', flush=True) if include_spike_waveforms: print('writing waveforms...', end='', flush=True) for shankn in np.arange(1, 9, dtype=int): ns.write_spike_waveforms(nwbfile, session_path, shankn, stub=stub) print('done.', flush=True) decomp_series = DecompositionSeries(name='LFPDecompositionSeries', description='Theta and Gamma phase for reference LFP', data=data, rate=lfp_fs, source_timeseries=lfp_ts, metric='phase', unit='radians') decomp_series.add_band(band_name='theta', band_limits=(4, 10)) decomp_series.add_band(band_name='gamma', band_limits=(30, 80)) check_module(nwbfile, 'ecephys', 'contains processed extracellular electrophysiology data').add_data_interface(decomp_series) [nwbfile.add_stimulus(x) for x in ns.get_events(session_path)] # create epochs corresponding to experiments/environments for the mouse sleep_state_fpath = os.path.join(session_path, '{}--StatePeriod.mat'.format(session_id)) exist_pos_data = any(os.path.isfile(os.path.join(session_path, '{}__{}.mat'.format(session_id, task_type['name']))) for task_type in task_types) if exist_pos_data: nwbfile.add_epoch_column('label', 'name of epoch') for task_type in task_types: label = task_type['name'] file = os.path.join(session_path, session_id + '__' + label + '.mat') if os.path.isfile(file): print('loading position for ' + label + '...', end='', flush=True) pos_obj = Position(name=label + '_position') matin = loadmat(file) tt = matin['twhl_norm'][:, 0] exp_times = find_discontinuities(tt) if 'conversion' in task_type: conversion = task_type['conversion'] else: conversion = np.nan for pos_type in ('twhl_norm', 'twhl_linearized'): if pos_type in matin: pos_data_norm = matin[pos_type][:, 1:] spatial_series_object = SpatialSeries( name=label + '_{}_spatial_series'.format(pos_type), data=H5DataIO(pos_data_norm, compression='gzip'), reference_frame='unknown', conversion=conversion, resolution=np.nan, timestamps=H5DataIO(tt, compression='gzip')) pos_obj.add_spatial_series(spatial_series_object) check_module(nwbfile, 'behavior', 'contains processed behavioral data').add_data_interface(pos_obj) for i, window in enumerate(exp_times): nwbfile.add_epoch(start_time=window[0], stop_time=window[1], label=label + '_' + str(i)) print('done.') # there are occasional mismatches between the matlab struct and the neuroscope files # regions: 3: 'CA3', 4: 'DG' df_unit_features = get_UnitFeatureCell_features(fpath_base, session_id, session_path) celltype_names = [] for celltype_id, region_id in zip(df_unit_features['fineCellType'].values, df_unit_features['region'].values): if celltype_id == 1: if region_id == 3: celltype_names.append('pyramidal cell') elif region_id == 4: celltype_names.append('granule cell') else: raise Exception('unknown type') elif not np.isfinite(celltype_id): celltype_names.append('missing') else: celltype_names.append(celltype_dict[celltype_id]) custom_unit_columns = [ { 'name': 'cell_type', 'description': 'name of cell type', 'data': celltype_names}, { 'name': 'global_id', 'description': 'global id for cell for entire experiment', 'data': df_unit_features['unitID'].values}, { 'name': 'max_electrode', 'description': 'electrode that has the maximum amplitude of the waveform', 'data': get_max_electrodes(nwbfile, session_path), 'table': nwbfile.electrodes }] ns.add_units(nwbfile, session_path, custom_unit_columns, max_shanks=max_shanks) trialdata_path = os.path.join(session_path, session_id + '__EightMazeRun.mat') if os.path.isfile(trialdata_path): trials_data = loadmat(trialdata_path)['EightMazeRun'] trialdatainfo_path = os.path.join(fpath_base, 'EightMazeRunInfo.mat') trialdatainfo = [x[0] for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0]] features = trialdatainfo[:7] features[:2] = 'start_time', 'stop_time', [nwbfile.add_trial_column(x, 'description') for x in features[4:] + ['condition']] for trial_data in trials_data: if trial_data[3]: cond = 'run_left' else: cond = 'run_right' nwbfile.add_trial(start_time=trial_data[0], stop_time=trial_data[1], condition=cond, error_run=trial_data[4], stim_run=trial_data[5], both_visit=trial_data[6]) """ mono_syn_fpath = os.path.join(session_path, session_id+'-MonoSynConvClick.mat') matin = loadmat(mono_syn_fpath) exc = matin['FinalExcMonoSynID'] inh = matin['FinalInhMonoSynID'] #exc_obj = CatCellInfo(name='excitatory_connections', # indices_values=[], cell_index=exc[:, 0] - 1, indices=exc[:, 1] - 1) #module_cellular.add_container(exc_obj) #inh_obj = CatCellInfo(name='inhibitory_connections', # indices_values=[], cell_index=inh[:, 0] - 1, indices=inh[:, 1] - 1) #module_cellular.add_container(inh_obj) """ if os.path.isfile(sleep_state_fpath): matin = loadmat(sleep_state_fpath)['StatePeriod'] table = TimeIntervals(name='states', description='sleep states of animal') table.add_column(name='label', description='sleep state') data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append({'start_time': row[0], 'stop_time': row[1], 'label': name}) [table.add_row(**row) for row in sorted(data, key=lambda x: x['start_time'])] check_module(nwbfile, 'behavior', 'contains behavioral data').add_data_interface(table) if stub: out_fname = session_path + '_stub.nwb' else: out_fname = session_path + '.nwb' print('writing NWB file...', end='', flush=True) with NWBHDF5IO(out_fname, mode='w') as io: io.write(nwbfile) print('done.') print('testing read...', end='', flush=True) # test read with NWBHDF5IO(out_fname, mode='r') as io: io.read() print('done.')
if isinstance(lfp_file, str): # this needs to be rewritten to # 1) pull the number of channel (here hard coded as N = 80), from the XML # 2) load in chunks so you don't overwhelm the RAM all_channels = np.fromfile(lfp_file, dtype=np.int16).reshape(-1, 80) all_channels_lfp = all_channels[:, all_shank_channels] print('done.') if WRITE_ALL_LFPS: print('making ElectricalSeries objects for LFP...', end='', flush=True) lfp = nwbfile.add_acquisition( ElectricalSeries('lfp', 'lfp signal for all shank electrodes', H5DataIO(all_channels_lfp, compression='gzip'), all_table_region, conversion=np.nan, starting_time=0.0, rate=lfp_fs, resolution=np.nan)) all_ts.append(lfp) print('done.') module_behavior = nwbfile.create_processing_module( name='behavior', description='contains behavioral data') out_fname = fname + '.nwb' print('writing NWB file...', end='', flush=True) with NWBHDF5IO(out_fname, mode='w') as io: io.write(nwbfile, cache_spec=False)
def convert_data( self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool = False, include_spike_waveforms: bool = False ): session_path = self.input_args["folder_path"] # TODO: check/enforce format? task_types = metadata_dict.get("task_types", []) subject_path, session_id = os.path.split(session_path) fpath_base = os.path.split(subject_path)[0] [nwbfile.add_stimulus(x) for x in get_events(session_path)] exist_pos_data = any( os.path.isfile(os.path.join(session_path, "{}__{}.mat".format(session_id, task_type["name"]))) for task_type in task_types ) if exist_pos_data: nwbfile.add_epoch_column("label", "name of epoch") for task_type in task_types: label = task_type["name"] file = os.path.join(session_path, session_id + "__" + label + ".mat") if os.path.isfile(file): pos_obj = Position(name=label + "_position") matin = loadmat(file) tt = matin["twhl_norm"][:, 0] exp_times = find_discontinuities(tt) if "conversion" in task_type: conversion = task_type["conversion"] else: conversion = np.nan for pos_type in ("twhl_norm", "twhl_linearized"): if pos_type in matin: pos_data_norm = matin[pos_type][:, 1:] spatial_series_object = SpatialSeries( name=label + "_{}_spatial_series".format(pos_type), data=H5DataIO(pos_data_norm, compression="gzip"), reference_frame="unknown", conversion=conversion, resolution=np.nan, timestamps=H5DataIO(tt, compression="gzip"), ) pos_obj.add_spatial_series(spatial_series_object) check_module(nwbfile, "behavior", "contains processed behavioral data").add_data_interface(pos_obj) for i, window in enumerate(exp_times): nwbfile.add_epoch(start_time=window[0], stop_time=window[1], label=label + "_" + str(i)) trialdata_path = os.path.join(session_path, session_id + "__EightMazeRun.mat") if os.path.isfile(trialdata_path): trials_data = loadmat(trialdata_path)["EightMazeRun"] trialdatainfo_path = os.path.join(fpath_base, "EightMazeRunInfo.mat") trialdatainfo = [x[0] for x in loadmat(trialdatainfo_path)["EightMazeRunInfo"][0]] features = trialdatainfo[:7] features[:2] = ( "start_time", "stop_time", ) [nwbfile.add_trial_column(x, "description") for x in features[4:] + ["condition"]] for trial_data in trials_data: if trial_data[3]: cond = "run_left" else: cond = "run_right" nwbfile.add_trial( start_time=trial_data[0], stop_time=trial_data[1], condition=cond, error_run=trial_data[4], stim_run=trial_data[5], both_visit=trial_data[6], ) sleep_state_fpath = os.path.join(session_path, "{}.SleepState.states.mat".format(session_id)) # label renaming specific to Watson state_label_names = {"WAKEstate": "Awake", "NREMstate": "Non-REM", "REMstate": "REM"} if os.path.isfile(sleep_state_fpath): matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0] table = TimeIntervals(name="states", description="Sleep states of animal.") table.add_column(name="label", description="Sleep state.") data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append({"start_time": row[0], "stop_time": row[1], "label": state_label_names[name]}) [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])] check_module(nwbfile, "behavior", "contains behavioral data").add_data_interface(table)
data = np.arange(100, 200, 10) timestamps = np.arange(10) test_ts = TimeSeries(name='test_regular_timeseries', data=data, unit='SIunit', timestamps=timestamps) nwbfile.add_acquisition(test_ts) #################### # Now let's say we want to compress the recorded data values. We now simply need to wrap our data with H5DataIO. # Everything else remains the same from hdmf.backends.hdf5.h5_utils import H5DataIO wrapped_data = H5DataIO(data=data, compression=True) # <---- test_ts = TimeSeries( name='test_compressed_timeseries', data=wrapped_data, # <---- unit='SIunit', timestamps=timestamps) nwbfile.add_acquisition(test_ts) #################### # This simple approach gives us access to a broad range of advanced I/O features, such as, chunking and # compression. For a complete list of all available settings see :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO` #################### # Chunking # -------- #
chunk_shape = (10, 10) num_values = num_chunks * np.prod(chunk_shape) # Create our sparse matrix data. data = SparseMatrixIterator(shape=(xsize, ysize), num_chunks=num_chunks, chunk_shape=chunk_shape) ##################### # In order to also enable compression and other advanced HDF5 dataset I/O featurs we can then also # wrap our data via :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO`. from hdmf.backends.hdf5.h5_utils import H5DataIO matrix2 = SparseMatrixIterator(shape=(xsize, ysize), num_chunks=num_chunks, chunk_shape=chunk_shape) data2 = H5DataIO(data=matrix2, compression='gzip', compression_opts=4) ###################### # We can now also customize the chunking , fillvalue and other settings # from hdmf.backends.hdf5.h5_utils import H5DataIO # Increase the chunk size and add compression matrix3 = SparseMatrixIterator(shape=(xsize, ysize), num_chunks=num_chunks, chunk_shape=chunk_shape) data3 = H5DataIO(data=matrix3, chunks=(100, 100), fillvalue=np.nan) # Increase the chunk size and add compression matrix4 = SparseMatrixIterator(shape=(xsize, ysize), num_chunks=num_chunks,
def convert_data( self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool = False, include_spike_waveforms: bool = False, ): """Convert the behavioral portion of a particular session of the GrosmarkAD dataset.""" session_path = self.input_args["folder_path"] subject_path, session_id = os.path.split(session_path) # Stimuli [nwbfile.add_stimulus(x) for x in get_events(session_path)] # States sleep_state_fpath = os.path.join(session_path, "{session_id}.SleepState.states.mat") # label renaming specific to Watson state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM") if os.path.isfile(sleep_state_fpath): matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0] table = TimeIntervals(name="states", description="Sleep states of animal.") table.add_column(name="label", description="Sleep state.") data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append( dict( start_time=row[0], stop_time=row[1], label=state_label_names[name], )) [ table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"]) ] check_module(nwbfile, "behavior", "contains behavioral data").add_data_interface(table) # Position pos_filepath = Path( session_path) / f"{session_id}.position.behavior.mat" pos_mat = loadmat(str(pos_filepath.absolute())) starting_time = float( pos_mat["position"]["timestamps"][0][0] [0]) # confirmed to be a regularly sampled series rate = float( pos_mat["position"]["timestamps"][0][0][1]) - starting_time if pos_mat["position"]["units"][0][0][0] == "m": conversion = 1.0 else: warnings.warn( f"Spatial units ({pos_mat['position']['units'][0][0][0]}) not listed in meters; " "setting conversion to nan.") conversion = np.nan pos_data = [[x[0], y[0]] for x, y in zip( pos_mat["position"]["position"][0][0]["x"][0][0], pos_mat["position"]["position"][0][0]["y"][0][0], )] linearized_data = [[ lin[0] ] for lin in pos_mat["position"]["position"][0][0]["lin"][0][0]] label = pos_mat["position"]["behaviorinfo"][0][0]["MazeType"][0][0][ 0].replace(" ", "") pos_obj = Position(name=f"{label}Position") spatial_series_object = SpatialSeries( name=f"{label}SpatialSeries", description= "(x,y) coordinates tracking subject movement through the maze.", data=H5DataIO(pos_data, compression="gzip"), reference_frame="unknown", conversion=conversion, starting_time=starting_time, rate=rate, resolution=np.nan, ) pos_obj.add_spatial_series(spatial_series_object) check_module( nwbfile, "behavior", "contains processed behavioral data").add_data_interface(pos_obj) lin_pos_obj = Position(name=f"{label}LinearizedPosition") lin_spatial_series_object = SpatialSeries( name=f"{label}LinearizedTimeSeries", description= "Linearized position, defined as starting at the edge of reward area, " "and increasing clockwise, terminating at the opposing edge of the reward area.", data=H5DataIO(linearized_data, compression="gzip"), reference_frame="unknown", conversion=conversion, starting_time=starting_time, rate=rate, resolution=np.nan, ) lin_pos_obj.add_spatial_series(lin_spatial_series_object) check_module(nwbfile, "behavior", "contains processed behavioral data").add_data_interface( lin_pos_obj) # Epochs epoch_names = list(pos_mat["position"]["Epochs"][0][0].dtype.names) epoch_windows = [[float(start), float(stop)] for x in pos_mat["position"]["Epochs"][0][0][0][0] for start, stop in x] nwbfile.add_epoch_column("label", "name of epoch") for j, epoch_name in enumerate(epoch_names): nwbfile.add_epoch( start_time=epoch_windows[j][0], stop_time=epoch_windows[j][1], label=epoch_name, )
def write_segmentation( segext_obj: SegmentationExtractor, save_path: PathType = None, plane_num=0, metadata: dict = None, overwrite: bool = True, buffer_size: int = 10, nwbfile=None, ): assert ( save_path is None or nwbfile is None ), "Either pass a save_path location, or nwbfile object, but not both!" # parse metadata correctly: if isinstance(segext_obj, MultiSegmentationExtractor): segext_objs = segext_obj.segmentations if metadata is not None: assert isinstance(metadata, list), ( "For MultiSegmentationExtractor enter 'metadata' as a list of " "SegmentationExtractor metadata") assert len(metadata) == len(segext_objs), ( "The 'metadata' argument should be a list with the same " "number of elements as the segmentations in the " "MultiSegmentationExtractor") else: segext_objs = [segext_obj] if metadata is not None and not isinstance(metadata, list): metadata = [metadata] metadata_base_list = [ NwbSegmentationExtractor.get_nwb_metadata(sgobj) for sgobj in segext_objs ] print(f"writing nwb for {segext_obj.extractor_name}\n") # updating base metadata with new: for num, data in enumerate(metadata_base_list): metadata_input = metadata[num] if metadata else {} metadata_base_list[num] = dict_recursive_update( metadata_base_list[num], metadata_input) metadata_base_common = metadata_base_list[0] # build/retrieve nwbfile: if nwbfile is not None: assert isinstance( nwbfile, NWBFile), "'nwbfile' should be of type pynwb.NWBFile" write = False else: write = True save_path = Path(save_path) assert save_path.suffix == ".nwb" if save_path.is_file() and not overwrite: nwbfile_exist = True file_mode = "r+" else: if save_path.is_file(): os.remove(save_path) if not save_path.parent.is_dir(): save_path.parent.mkdir(parents=True) nwbfile_exist = False file_mode = "w" io = NWBHDF5IO(str(save_path), file_mode) if nwbfile_exist: nwbfile = io.read() else: nwbfile = NWBFile(**metadata_base_common["NWBFile"]) # Subject: if metadata_base_common.get("Subject") and nwbfile.subject is None: nwbfile.subject = Subject(**metadata_base_common["Subject"]) # Processing Module: if "ophys" not in nwbfile.processing: ophys = nwbfile.create_processing_module( "ophys", "contains optical physiology processed data") else: ophys = nwbfile.get_processing_module("ophys") for plane_no_loop, (segext_obj, metadata) in enumerate( zip(segext_objs, metadata_base_list)): # Device: if metadata["Ophys"]["Device"][0]["name"] not in nwbfile.devices: nwbfile.create_device(**metadata["Ophys"]["Device"][0]) # ImageSegmentation: image_segmentation_name = ( "ImageSegmentation" if plane_no_loop == 0 else f"ImageSegmentation_Plane{plane_no_loop}") if image_segmentation_name not in ophys.data_interfaces: image_segmentation = ImageSegmentation( name=image_segmentation_name) ophys.add(image_segmentation) else: image_segmentation = ophys.data_interfaces.get( image_segmentation_name) # OpticalChannel: optical_channels = [ OpticalChannel(**i) for i in metadata["Ophys"]["ImagingPlane"] [0]["optical_channel"] ] # ImagingPlane: image_plane_name = ("ImagingPlane" if plane_no_loop == 0 else f"ImagePlane_{plane_no_loop}") if image_plane_name not in nwbfile.imaging_planes.keys(): input_kwargs = dict( name=image_plane_name, device=nwbfile.get_device( metadata_base_common["Ophys"]["Device"][0]["name"]), ) metadata["Ophys"]["ImagingPlane"][0][ "optical_channel"] = optical_channels input_kwargs.update(**metadata["Ophys"]["ImagingPlane"][0]) if "imaging_rate" in input_kwargs: input_kwargs["imaging_rate"] = float( input_kwargs["imaging_rate"]) imaging_plane = nwbfile.create_imaging_plane(**input_kwargs) else: imaging_plane = nwbfile.imaging_planes[image_plane_name] # PlaneSegmentation: input_kwargs = dict( description="output from segmenting imaging plane", imaging_plane=imaging_plane, ) ps_metadata = metadata["Ophys"]["ImageSegmentation"][ "plane_segmentations"][0] if ps_metadata[ "name"] not in image_segmentation.plane_segmentations: ps_exist = False else: ps = image_segmentation.get_plane_segmentation( ps_metadata["name"]) ps_exist = True roi_ids = segext_obj.get_roi_ids() accepted_list = segext_obj.get_accepted_list() accepted_list = [] if accepted_list is None else accepted_list rejected_list = segext_obj.get_rejected_list() rejected_list = [] if rejected_list is None else rejected_list accepted_ids = [1 if k in accepted_list else 0 for k in roi_ids] rejected_ids = [1 if k in rejected_list else 0 for k in roi_ids] roi_locations = np.array(segext_obj.get_roi_locations()).T def image_mask_iterator(): for id in segext_obj.get_roi_ids(): img_msks = segext_obj.get_roi_image_masks( roi_ids=[id]).T.squeeze() yield img_msks if not ps_exist: input_kwargs.update( **ps_metadata, columns=[ VectorData( data=H5DataIO( DataChunkIterator(image_mask_iterator(), buffer_size=buffer_size), compression=True, compression_opts=9, ), name="image_mask", description="image masks", ), VectorData( data=roi_locations, name="RoiCentroid", description= "x,y location of centroid of the roi in image_mask", ), VectorData( data=accepted_ids, name="Accepted", description= "1 if ROi was accepted or 0 if rejected as a cell during segmentation operation", ), VectorData( data=rejected_ids, name="Rejected", description= "1 if ROi was rejected or 0 if accepted as a cell during segmentation operation", ), ], id=roi_ids, ) ps = image_segmentation.create_plane_segmentation( **input_kwargs) # Fluorescence Traces: if "Flourescence" not in ophys.data_interfaces: fluorescence = Fluorescence() ophys.add(fluorescence) else: fluorescence = ophys.data_interfaces["Fluorescence"] roi_response_dict = segext_obj.get_traces_dict() roi_table_region = ps.create_roi_table_region( description=f"region for Imaging plane{plane_no_loop}", region=list(range(segext_obj.get_num_rois())), ) rate = (np.float("NaN") if segext_obj.get_sampling_frequency() is None else segext_obj.get_sampling_frequency()) for i, j in roi_response_dict.items(): data = getattr(segext_obj, f"_roi_response_{i}") if data is not None: data = np.asarray(data) trace_name = "RoiResponseSeries" if i == "raw" else i.capitalize( ) trace_name = (trace_name if plane_no_loop == 0 else trace_name + f"_Plane{plane_no_loop}") input_kwargs = dict( name=trace_name, data=data.T, rois=roi_table_region, rate=rate, unit="n.a.", ) if trace_name not in fluorescence.roi_response_series: fluorescence.create_roi_response_series(**input_kwargs) # create Two Photon Series: if "TwoPhotonSeries" not in nwbfile.acquisition: warn( "could not find TwoPhotonSeries, using ImagingExtractor to create an nwbfile" ) # adding images: images_dict = segext_obj.get_images_dict() if any([image is not None for image in images_dict.values()]): images_name = ("SegmentationImages" if plane_no_loop == 0 else f"SegmentationImages_Plane{plane_no_loop}") if images_name not in ophys.data_interfaces: images = Images(images_name) for img_name, img_no in images_dict.items(): if img_no is not None: images.add_image( GrayscaleImage(name=img_name, data=img_no.T)) ophys.add(images) # saving NWB file: if write: io.write(nwbfile) io.close() # test read with NWBHDF5IO(str(save_path), "r") as io: io.read()
def write_spike_waveforms_single_shank( nwbfile: NWBFile, session_path: str, shankn: int, spikes_nsamples: int, nchan_on_shank: int, stub_test: bool = False, compression: Optional[str] = "gzip", ): """Write spike waveforms to NWBFile. Parameters ---------- nwbfile: pynwb.NWBFile session_path: str shankn: int spikes_nsamples: int nchan_on_shank: int stub_test: bool, optional default: False compression: str (optional) default: 'gzip' """ session_name = os.path.split(session_path)[1] spk_file = os.path.join(session_path, session_name + ".spk.{}".format(shankn)) assert os.path.isfile( spk_file ), "No .spk.{} file found at the path location!" "Unable to retrieve spike waveforms.".format( shankn) group = nwbfile.electrode_groups["shank{}".format(shankn)] elec_idx = list( np.where(np.array(nwbfile.ec_electrodes["group"]) == group)[0]) table_region = nwbfile.create_electrode_table_region( elec_idx, group.name + " region") if stub_test: n_stub_spikes = 50 spks = np.fromfile( spk_file, dtype=np.int16, count=n_stub_spikes * spikes_nsamples * nchan_on_shank, ).reshape(n_stub_spikes, spikes_nsamples, nchan_on_shank) spk_times = read_spike_times(session_path, shankn)[:n_stub_spikes] else: spks = np.fromfile(spk_file, dtype=np.int16).reshape(-1, spikes_nsamples, nchan_on_shank) spk_times = read_spike_times(session_path, shankn) if compression: data = H5DataIO(spks, compression=compression) else: data = spks spike_event_series = SpikeEventSeries( name="SpikeWaveforms{}".format(shankn), data=data, timestamps=spk_times, conversion=1e-6, electrodes=table_region, ) check_module(nwbfile, "ecephys").add_data_interface(spike_event_series)
nwbfile4.add_acquisition(test_ts4) #################### # In the above case we did not make it explicit how we want to handle the data from # our TimeSeries, this means that :py:class:`~pynwb.NWBHDF5IO` will need to # determine on write how to treat the dataset. We can make this explicit and customize this # behavior on a per-dataset basis by wrapping our dataset using # :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO` from hdmf.backends.hdf5.h5_utils import H5DataIO # Create another timeseries that links to the same data test_ts5 = TimeSeries( name='test_timeseries5', data=H5DataIO( data=timeseries_1_data, # <------- link_data=True), # <------- unit='SIunit', timestamps=timestamps) nwbfile4.add_acquisition(test_ts5) #################### # Step 4: Write the data # ^^^^^^^^^^^^^^^^^^^^^^^ # from pynwb import NWBHDF5IO io4 = NWBHDF5IO(filename4, 'w') io4.write(nwbfile4, link_data=True ) # <-------- Specify default behavior to link rather than copy data io4.close()
############################################################################### # Creating an expandable table # ---------------------------- # When using the default HDF5 backend, each column of these tables is an HDF5 Dataset, # which by default are set in size. This means that once a file is written, it is not # possible to add a new row. If you want to be able to save this file, load it, and add # more rows to the table, you will need to set this up when you create the # :py:class:`~hdmf.common.table.DynamicTable`. You do this by wrapping the data with # :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO` and the argument ``maxshape=(None, )``. from hdmf.backends.hdf5.h5_utils import H5DataIO col1 = VectorData( name='expandable_col1', description='column #1', data=H5DataIO(data=[1, 2], maxshape=(None, )), ) col2 = VectorData( name='expandable_col2', description='column #2', data=H5DataIO(data=['a', 'b'], maxshape=(None, )), ) # don't forget to wrap the row IDs too! ids = ElementIdentifiers( name='id', data=H5DataIO(data=[0, 1], maxshape=(None, )), ) expandable_table = DynamicTable( name='expandable_table',
def run_conversion(self, nwbfile: NWBFile, metadata: dict, stub_test: bool = False, external_mode: bool = True, starting_times: Optional[list] = None, chunk_data: bool = True, module_name: Optional[str] = None, module_description: Optional[str] = None): """ Convert the movie data files to ImageSeries and write them in the NWBFile. Parameters ---------- nwbfile : NWBFile metadata : dict stub_test : bool, optional If True, truncates the write operation for fast testing. The default is False. external_mode : bool, optional ImageSeries in NWBFiles may contain either explicit movie data or file paths to external movie files. If True, this utilizes the more efficient method of merely encoding the file path linkage (recommended). For data sharing, the video files must be contained in the same folder as the NWBFile. If the intention of this NWBFile involves an upload to DANDI, the non-NWBFile types are not allowed so this flag would have to be set to False. The default is True. starting_times : list, optional List of start times for each movie. If unspecified, assumes that the movies in the file_paths list are in sequential order and are contiguous. chunk_data : bool, optional If True, uses a DataChunkIterator to read and write the movie, reducing overhead RAM usage at the cost of reduced conversion speed (compared to loading video entirely into RAM as an array). This will also force to True, even if manually set to False, whenever the video file size exceeds available system RAM by a factor of 70 (from compression experiments). Based on experiements for a ~30 FPS system of ~400 x ~600 color frames, the equivalent uncompressed RAM usage is around 2GB per minute of video. The default is True. module_name: str, optional Name of the processing module to add the ImageSeries object to. Default behavior is to add as acquisition. module_description: str, optional If the processing module specified by module_name does not exist, it will be created with this description. The default description is the same as used by the conversion_tools.get_module function. """ file_paths = self.source_data['file_paths'] if stub_test: count_max = 10 else: count_max = np.inf if starting_times is not None: assert isinstance(starting_times, list) and all([isinstance(x, float) for x in starting_times]) \ and len(starting_times) == len(file_paths), \ "Argument 'starting_times' must be a list of floats in one-to-one correspondence with 'file_paths'!" else: starting_times = [0.] for j, file in enumerate(file_paths): timestamps = starting_times[j] + get_movie_timestamps( movie_file=file) if len(starting_times) != len(file_paths): starting_times.append(timestamps[-1]) image_series_kwargs = dict(name=f"Video: {Path(file).stem}", description="Video recorded by camera.", unit="Frames") if check_regular_timestamps(ts=timestamps): fps = get_movie_fps(movie_file=file) image_series_kwargs.update(starting_time=starting_times[j], rate=fps) else: image_series_kwargs.update( timestamps=H5DataIO(timestamps, compression="gzip")) if external_mode: image_series_kwargs.update(format="external", external_file=[file]) else: uncompressed_estimate = Path(file).stat().st_size * 70 available_memory = psutil.virtual_memory().available if not chunk_data and uncompressed_estimate >= available_memory: warn( f"Not enough memory (estimated {round(uncompressed_estimate/1e9, 2)} GB) to load movie file as " f"array ({round(available_memory/1e9, 2)} GB available)! Forcing chunk_data to True." ) chunk_data = True total_frames = len(timestamps) frame_shape = get_frame_shape(movie_file=file) maxshape = [total_frames] maxshape.extend(frame_shape) best_gzip_chunk = (1, frame_shape[0], frame_shape[1], 3) tqdm_pos, tqdm_mininterval = (0, 10) if chunk_data: def data_generator(file, count_max): cap = cv2.VideoCapture(str(file)) for _ in range(min(count_max, total_frames)): success, frame = cap.read() yield frame cap.release() mov = DataChunkIterator( data=tqdm( iterable=data_generator(file=file, count_max=count_max), desc=f"Copying movie data for {Path(file).name}", position=tqdm_pos, total=total_frames, mininterval=tqdm_mininterval), iter_axis=0, # nwb standard is time as zero axis maxshape=tuple(maxshape)) image_series_kwargs.update(data=H5DataIO( mov, compression="gzip", chunks=best_gzip_chunk)) else: cap = cv2.VideoCapture(str(file)) mov = [] with tqdm(desc=f"Reading movie data for {Path(file).name}", position=tqdm_pos, total=total_frames, mininterval=tqdm_mininterval) as pbar: for _ in range(min(count_max, total_frames)): success, frame = cap.read() mov.append(frame) pbar.update(1) cap.release() image_series_kwargs.update(data=H5DataIO( DataChunkIterator( tqdm(iterable=np.array(mov), desc= f"Writing movie data for {Path(file).name}", position=tqdm_pos, mininterval=tqdm_mininterval), iter_axis=0, # nwb standard is time as zero axis maxshape=tuple(maxshape)), compression="gzip", chunks=best_gzip_chunk)) if module_name is None: nwbfile.add_acquisition(ImageSeries(**image_series_kwargs)) else: get_module(nwbfile=nwbfile, name=module_name, description=module_description).add( ImageSeries(**image_series_kwargs))
chunk_shape = (10, 10) num_values = num_chunks * np.prod(chunk_shape) # Create our sparse matrix data. data = SparseMatrixIterator(shape=(xsize, ysize), num_chunks=num_chunks, chunk_shape=chunk_shape) ##################### # In order to also enable compression and other advanced HDF5 dataset I/O features we can then also # wrap our data via :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO`. from hdmf.backends.hdf5.h5_utils import H5DataIO matrix2 = SparseMatrixIterator(shape=(xsize, ysize), num_chunks=num_chunks, chunk_shape=chunk_shape) data2 = H5DataIO(data=matrix2, compression='gzip', compression_opts=4) ###################### # We can now also customize the chunking , fillvalue and other settings # from hdmf.backends.hdf5.h5_utils import H5DataIO # Increase the chunk size and add compression matrix3 = SparseMatrixIterator(shape=(xsize, ysize), num_chunks=num_chunks, chunk_shape=chunk_shape) data3 = H5DataIO(data=matrix3, chunks=(100, 100), fillvalue=np.nan) # Increase the chunk size and add compression matrix4 = SparseMatrixIterator(shape=(xsize, ysize), num_chunks=num_chunks,
def run_conversion(self, nwbfile: NWBFile, metadata: dict): mat_file_path = self.source_data["mat_file_path"] mat_file = loadmat(mat_file_path) trial_info = mat_file["SessionNP"] nwbfile.add_trial_column( name="reward_time", description="Time when subject began consuming reward.") nwbfile.add_trial_column( name="left_or_right", description="Time when subject began consuming reward.") l_r_dict = {1: "Right", 2: "Left"} for trial in trial_info: nwbfile.add_trial(start_time=trial[0], stop_time=trial[1], reward_time=trial[2], left_or_right=l_r_dict[int(trial[3])]) # Position pos_info = mat_file["whlrl"] pos_data = [pos_info[:, 0:1], pos_info[:, 2:3]] starting_time = 0.0 rate = 20000 / 512 # from CRCNS info conversion = np.nan # whl are arbitrary units pos_obj = Position(name="Position") for j in range(2): spatial_series_object = SpatialSeries( name=f"SpatialSeries{j+1}", description= "(x,y) coordinates tracking subject movement through the maze.", data=H5DataIO(pos_data[j], compression="gzip"), reference_frame="unknown", conversion=conversion, starting_time=starting_time, rate=rate, resolution=np.nan, ) pos_obj.add_spatial_series(spatial_series_object) get_module(nwbfile=nwbfile, name="behavior", description="Contains processed behavioral data." ).add_data_interface(pos_obj) linearized_pos = mat_file["whlrld"][:, 6] lin_pos_obj = Position(name="LinearizedPosition") lin_spatial_series_object = SpatialSeries( name="LinearizedTimeSeries", description= ("Linearized position, with '1' defined as start position (the position at the time of last nose-poking " "in the trial), and d=2 being the end position (position at the tiome just before reward consumption). " "d=0 means subject is not performing working memory trials."), data=H5DataIO(linearized_pos, compression="gzip"), reference_frame="unknown", conversion=conversion, starting_time=starting_time, rate=rate, resolution=np.nan, ) lin_pos_obj.add_spatial_series(lin_spatial_series_object) get_module(nwbfile=nwbfile, name="behavior").add_data_interface(lin_pos_obj)