Beispiel #1
0
    def roundtripContainer(self):
        data_file = NWBFile(
            session_description='a test file',
            identifier='data_file',
            session_start_time=self.start_time
        )
        data_file.add_acquisition(self.container)

        with HDF5IO(self.data_filename, 'w', manager=get_manager()) as self.data_write_io:
            self.data_write_io.write(data_file)

        with HDF5IO(self.data_filename, 'r', manager=get_manager()) as self.data_read_io:
            data_file_obt = self.data_read_io.read()

            with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io:
                link_file = NWBFile(
                    session_description='a test file',
                    identifier='link_file',
                    session_start_time=self.start_time
                )
                link_file.add_acquisition(TimeSeries(
                    name='test_mod_ts',
                    unit='V',
                    data=data_file_obt.get_acquisition('data_ts'),
                    timestamps=H5DataIO(
                        data=data_file_obt.get_acquisition('data_ts').timestamps,
                        link_data=True
                    )
                ))
                link_write_io.write(link_file)

        with HDF5IO(self.link_filename, 'r', manager=get_manager()) as self.link_file_reader:
            return self.getContainer(self.link_file_reader.read())
Beispiel #2
0
def write_lfp(
    nwbfile: NWBFile,
    data: ArrayLike,
    fs: float,
    electrode_inds: Optional[List[int]] = None,
    name: Optional[str] = "LFP",
    description: Optional[str] = "local field potential signal",
):
    """
    Add LFP from neuroscope to a "ecephys" processing module of an NWBFile.

    Parameters
    ----------
    nwbfile: pynwb.NWBFile
    data: array-like
    fs: float
    electrode_inds: list(int), optional
    name: str, optional
    description: str, optional

    Returns
    -------
    LFP pynwb.ecephys.ElectricalSeries

    """
    if electrode_inds is None:
        if nwbfile.electrodes is not None and data.shape[1] <= len(
                nwbfile.electrodes.id.data[:]):
            electrode_inds = list(range(data.shape[1]))
        else:
            electrode_inds = list(range(len(nwbfile.electrodes.id.data[:])))

    table_region = nwbfile.create_electrode_table_region(
        electrode_inds, "electrode table reference")
    data = H5DataIO(
        DataChunkIterator(tqdm(data, desc="writing lfp data"),
                          buffer_size=int(fs * 3600)),
        compression="gzip",
    )
    lfp_electrical_series = ElectricalSeries(
        name=name,
        description=description,
        data=data,
        electrodes=table_region,
        conversion=1e-6,
        rate=fs,
        resolution=np.nan,
    )
    ecephys_mod = check_module(
        nwbfile,
        "ecephys",
        "intermediate data from extracellular electrophysiology recordings, e.g., LFP",
    )
    if "LFP" not in ecephys_mod.data_interfaces:
        ecephys_mod.add_data_interface(LFP(name="LFP"))
    ecephys_mod.data_interfaces["LFP"].add_electrical_series(
        lfp_electrical_series)

    return lfp_electrical_series
Beispiel #3
0
def createCompressedDataset(array):
    """
    Request compression for the given array and return it wrapped.
    """

    return H5DataIO(data=array,
                    compression=True,
                    chunks=True,
                    shuffle=True,
                    fletcher32=True)
Beispiel #4
0
    def run_conversion(self, nwbfile: NWBFile, metadata: dict):
        event_file = self.source_data['file_path']
        events_data = pd.read_csv(event_file, delimiter=";")
        event_timestamps = events_data['Time'].to_numpy() / 1E3
        event_labels = events_data['Tag'].to_numpy()
        unique_events = set(event_labels)
        events_map = {event: n for n, event in enumerate(unique_events)}
        event_data = [events_map[event] for event in event_labels]

        # Custom labeled events
        events = LabeledEvents(
            name='LabeledEvents',
            description='Events from the experiment.',
            timestamps=H5DataIO(event_timestamps, compression="gzip"),
            resolution=np.nan,
            data=H5DataIO(event_data, compression="gzip"),
            labels=list(unique_events)  # does not suppoort compression
        )
        nwbfile.add_acquisition(events)
Beispiel #5
0
def add_position_data(nwbfile,
                      session_path,
                      fs=1250. / 32.,
                      names=('x0', 'y0', 'x1', 'y1')):
    """Read raw position sensor data from .whl file

    Parameters
    ----------
    nwbfile: pynwb.NWBFile
    session_path: str
    fs: float
        sampling rate
    names: iterable
        names of column headings

    """
    session_name = os.path.split(session_path)[1]
    whl_path = os.path.join(session_path, session_name + '.whl')
    if not os.path.isfile(whl_path):
        print(whl_path + ' file not found!')
        return
    print('warning: time may not be aligned')
    df = pd.read_csv(whl_path, sep='\t', names=names)

    df.index = np.arange(len(df)) / fs
    df.index.name = 'tt (sec)'

    nwbfile.add_acquisition(
        SpatialSeries('position_sensor0',
                      H5DataIO(df[['x0', 'y0']].values, compression='gzip'),
                      'unknown',
                      description='raw sensor data from sensor 0',
                      timestamps=H5DataIO(df.index.values, compression='gzip'),
                      resolution=np.nan))

    nwbfile.add_acquisition(
        SpatialSeries('position_sensor1',
                      H5DataIO(df[['x1', 'y1']].values, compression='gzip'),
                      'unknown',
                      description='raw sensor data from sensor 1',
                      timestamps=H5DataIO(df.index.values, compression='gzip'),
                      resolution=np.nan))
Beispiel #6
0
def write_lfp(nwbfile,
              data,
              fs,
              name='LFP',
              description='local field potential signal',
              electrode_inds=None):
    """
    Add LFP from neuroscope to a "ecephys" processing module of an NWBFile

    Parameters
    ----------
    nwbfile: pynwb.NWBFile
    data: array-like
    fs: float
    name: str
    description: str
    electrode_inds: list(int)

    Returns
    -------
    LFP pynwb.ecephys.ElectricalSeries

    """

    if electrode_inds is None:
        electrode_inds = list(range(data.shape[1]))

    table_region = nwbfile.create_electrode_table_region(
        electrode_inds, 'electrode table reference')

    data = H5DataIO(DataChunkIterator(tqdm(data, desc='writing lfp data'),
                                      buffer_size=int(fs * 3600)),
                    compression='gzip')

    lfp_electrical_series = ElectricalSeries(name=name,
                                             description=description,
                                             data=data,
                                             electrodes=table_region,
                                             conversion=np.nan,
                                             rate=fs,
                                             resolution=np.nan)

    ecephys_mod = check_module(
        nwbfile, 'ecephys',
        'intermediate data from extracellular electrophysiology recordings, e.g., LFP'
    )

    if 'LFP' not in ecephys_mod.data_interfaces:
        ecephys_mod.add_data_interface(LFP(name='LFP'))

    ecephys_mod.data_interfaces['LFP'].add_electrical_series(
        lfp_electrical_series)

    return lfp_electrical_series
Beispiel #7
0
def add_position_data(nwbfile: NWBFile,
                      session_path: str,
                      fs: float = 1250. / 32.,
                      names=('x0', 'y0', 'x1', 'y1')):
    """Read raw position sensor data from .whl file.

    Parameters
    ----------
    nwbfile: pynwb.NWBFile
    session_path: str
    fs: float
        sampling rate
    names: iterable
        names of column headings

    """
    session_name = os.path.split(session_path)[1]
    whl_path = os.path.join(session_path, session_name + '.whl')
    if not os.path.isfile(whl_path):
        print(whl_path + ' file not found!')
        return
    df = pd.read_csv(whl_path, sep='\t', names=names)

    nwbfile.add_acquisition(
        SpatialSeries('position_sensor0',
                      H5DataIO(df[['x0', 'y0']].values, compression='gzip'),
                      'unknown',
                      description='raw sensor data from sensor 0',
                      starting_time=0.,
                      rate=fs,
                      resolution=np.nan))

    nwbfile.add_acquisition(
        SpatialSeries('position_sensor1',
                      H5DataIO(df[['x1', 'y1']].values, compression='gzip'),
                      'unknown',
                      description='raw sensor data from sensor 1',
                      starting_time=0.,
                      rate=fs,
                      resolution=np.nan))
Beispiel #8
0
def append_spike_times(input_nwb_path: PathLike,
                       sweep_spike_times: Dict[int, List[float]],
                       output_nwb_path: Optional[PathLike] = None):
    """
        Appends spiketimes to an nwb2 file

        Paramters
        ---------

        input_nwb_path: location of input nwb file without spiketimes

        spike_times: Dict of sweep_num: spiketimes

        output_nwb_path: optional location to write new nwb file with
                         spiketimes, otherwise appends spiketimes to
                         input file

    """

    # Copy to new location
    if output_nwb_path and output_nwb_path != input_nwb_path:
        shutil.copy(input_nwb_path, output_nwb_path)
        nwb_path = output_nwb_path
    else:
        nwb_path = input_nwb_path

    nwb_io = pynwb.NWBHDF5IO(nwb_path, mode='a', load_namespaces=True)
    nwbfile = nwb_io.read()

    spikes_module = "spikes"
    # Add spikes only if not previously added
    if spikes_module not in nwbfile.processing.keys():
        spike_module = ProcessingModule(name=spikes_module,
                                        description='detected spikes')
        for sweep_num, spike_times in sweep_spike_times.items():
            wrapped_spike_times = H5DataIO(data=np.asarray(spike_times),
                                           compression=True)
            ts = TimeSeries(timestamps=wrapped_spike_times,
                            unit='seconds',
                            data=wrapped_spike_times,
                            name=f"Sweep_{sweep_num}")
            spike_module.add_data_interface(ts)

        nwbfile.add_processing_module(spike_module)

        nwb_io.write(nwbfile)
    else:
        raise ValueError("Cannot add spikes times to the nwb file: "
                         "spikes times already exist!")

    nwb_io.close()
Beispiel #9
0
def add_LFP(nwbfile, expt, count=1, region='CA1'):
    eeg_local = [
        x for x in os.listdir(expt.LFPFilePath()) if x.endswith('.eeg')
    ][0]
    eeg_file = os.path.join(expt.LFPFilePath(), eeg_local)
    eeg_base = eeg_file.replace('.eeg', '')
    eeg_dict = lfph.loadEEG(eeg_base)

    lfp_xml_fpath = eeg_base + '.xml'
    channel_groups = get_channel_groups(lfp_xml_fpath)
    lfp_channels = channel_groups[0]
    lfp_fs = eeg_dict['sampeFreq']
    nchannels = eeg_dict['nChannels']

    lfp_signal = eeg_dict['EEG'][lfp_channels].T

    device_name = 'LFP_Device_{}'.format(count)
    device = nwbfile.create_device(device_name)
    electrode_group = nwbfile.create_electrode_group(name=device_name +
                                                     '_electrodes',
                                                     description=device_name,
                                                     device=device,
                                                     location=region)

    x, y, z = get_position(region)

    for channel in channel_groups[0]:
        nwbfile.add_electrode(
            float(x),
            float(y),
            float(z),  # position?
            imp=np.nan,
            location=region,
            filtering='See lab.misc.lfp_helpers.ConvertFromRHD',
            group=electrode_group,
            id=channel)

    lfp_table_region = nwbfile.create_electrode_table_region(
        list(range(len(lfp_channels))), 'lfp electrodes')

    # TODO add conversion field for moving to V
    # TODO figure out how to link lfp data (zipping seems kludgey)
    lfp_elec_series = ElectricalSeries(name='LFP',
                                       data=H5DataIO(lfp_signal,
                                                     compression='gzip'),
                                       electrodes=lfp_table_region,
                                       conversion=np.nan,
                                       rate=lfp_fs,
                                       resolution=np.nan)

    nwbfile.add_acquisition(LFP(electrical_series=lfp_elec_series))
Beispiel #10
0
def peyrache_spatial_series(name: str,
                            description: str,
                            data: np.array,
                            conversion: float,
                            pos_sf: float = 1250 / 32):
    """Specific constructor for Peyrache style spatial series."""
    return SpatialSeries(
        name=name,
        description=description,
        data=H5DataIO(data, compression="gzip"),
        conversion=conversion,
        reference_frame="Unknown",
        starting_time=0.0,
        rate=pos_sf,
        resolution=np.nan,
    )
Beispiel #11
0
    def add_two_photon_series(imaging, nwbfile, metadata, buffer_size=10):
        """
        Auxiliary static method for nwbextractor.
        Adds two photon series from imaging object as TwoPhotonSeries to nwbfile object.
        """
        metadata = dict_recursive_update(get_default_nwb_metadata(), metadata)
        metadata = update_dict(metadata,
                               NwbImagingExtractor.get_nwb_metadata(imaging))
        # Tests if ElectricalSeries already exists in acquisition
        nwb_es_names = [ac for ac in nwbfile.acquisition]
        opts = metadata["Ophys"]["TwoPhotonSeries"][0]
        if opts["name"] not in nwb_es_names:
            # retrieve device
            device = nwbfile.devices[list(nwbfile.devices.keys())[0]]
            metadata["Ophys"]["ImagingPlane"][0]["optical_channel"] = [
                OpticalChannel(**i) for i in metadata["Ophys"]["ImagingPlane"]
                [0]["optical_channel"]
            ]
            metadata["Ophys"]["ImagingPlane"][0] = update_dict(
                metadata["Ophys"]["ImagingPlane"][0], {"device": device})

            imaging_plane = nwbfile.create_imaging_plane(
                **metadata["Ophys"]["ImagingPlane"][0])

            def data_generator(imaging):
                for i in range(imaging.get_num_frames()):
                    yield imaging.get_frames(frame_idxs=[i]).T

            data = H5DataIO(
                DataChunkIterator(data_generator(imaging),
                                  buffer_size=buffer_size),
                compression=True,
            )

            # using internal data. this data will be stored inside the NWB file
            two_p_series_kwargs = update_dict(
                metadata["Ophys"]["TwoPhotonSeries"][0],
                dict(data=data, imaging_plane=imaging_plane),
            )
            ophys_ts = TwoPhotonSeries(**two_p_series_kwargs)

            nwbfile.add_acquisition(ophys_ts)

        return nwbfile
Beispiel #12
0
    def roundtripContainer(self):
        # create and write data file
        data_file = NWBFile(
            session_description='a test file',
            identifier='data_file',
            session_start_time=self.start_time
        )
        data_file.add_acquisition(self.container)

        with HDF5IO(self.data_filename, 'w', manager=get_manager()) as data_write_io:
            data_write_io.write(data_file)

        # read data file
        with HDF5IO(self.data_filename, 'r', manager=get_manager()) as self.data_read_io:
            data_file_obt = self.data_read_io.read()

            # write "link file" with timeseries.data that is an external link to the timeseries in "data file"
            # also link timeseries.timestamps.data to the timeseries.timestamps in "data file"
            with HDF5IO(self.link_filename, 'w', manager=get_manager()) as link_write_io:
                link_file = NWBFile(
                    session_description='a test file',
                    identifier='link_file',
                    session_start_time=self.start_time
                )
                self.link_container = TimeSeries(
                    name='test_mod_ts',
                    unit='V',
                    data=data_file_obt.get_acquisition('data_ts'),  # test direct link
                    timestamps=H5DataIO(
                        data=data_file_obt.get_acquisition('data_ts').timestamps,
                        link_data=True  # test with setting link data
                    )
                )
                link_file.add_acquisition(self.link_container)
                link_write_io.write(link_file)

        # note that self.link_container contains a link to a dataset that is now closed

        # read the link file
        self.link_read_io = HDF5IO(self.link_filename, 'r', manager=get_manager())
        self.read_nwbfile = self.link_read_io.read()
        return self.getContainer(self.read_nwbfile)
Beispiel #13
0
    def run_conversion(self,
                       nwbfile: NWBFile,
                       metadata: dict = None,
                       stub_test: bool = False):
        conditions = intervals_from_traces(self.recording_extractor)
        mech_stim = TimeIntervals(
            name='MechanicalStimulus',
            description=
            "Activation times inferred from TTL commands for mechanical stimulus."
        )
        laser_stim = TimeIntervals(
            name='LaserStimulus',
            description=
            "Activation times inferred from TTL commands for cortical laser stimulus."
        )
        for j, table in enumerate([mech_stim, laser_stim]):
            for row in conditions[j]:
                table.add_row(
                    dict(start_time=float(row[0]), stop_time=float(row[1])))
        # TODO - these really should be IntervalSeries added to stimulus, rather than processing
        check_module(nwbfile, 'stimulus',
                     "Contains stimuli data.").add(mech_stim)
        check_module(nwbfile, 'stimulus',
                     "Contains stimuli data.").add(laser_stim)

        if stub_test or self.subset_channels is not None:
            recording = self.subset_recording(stub_test=stub_test)
        else:
            recording = self.recording_extractor

        # Pressure values
        nwbfile.add_stimulus(
            TimeSeries(
                name='MechanicalPressure',
                data=H5DataIO(recording.get_traces(0), compression="gzip"),
                unit=self.recording_extractor._channel_smrxinfo[0]['unit'],
                conversion=recording.get_channel_property(0, 'gain'),
                rate=recording.get_sampling_frequency(),
                description=
                "Pressure sensor attached to the mechanical stimulus used to repeatedly evoke spiking."
            ))
Beispiel #14
0
def add_position_data(
        nwbfile: NWBFile,
        session_path: str,
        whl_file_path: OptionalPathType = None,
        starting_time: float = 0.0,
        fs: float = 1250.0 / 32.0,
        names=("x0", "y0", "x1", "y1"),
):
    """
    Read and write raw position sensor data from .whl file.

    Parameters
    ----------
    nwbfile: pynwb.NWBFile
    session_path: str
    fs: float
        sampling rate
    names: iterable
        names of column headings
    """
    session_id = Path(session_path).name
    if whl_file_path is None:
        whl_path = session_path / f"{session_id}.whl"
    assert whl_file_path.is_file(), f".whl file ({whl_path}) not found!"

    df = pd.read_csv(whl_file_path, sep="\t", names=names)
    for x in [0, 1]:
        nwbfile.add_acquisition(
            SpatialSeries(
                name=f"PositionSensor{x}",
                description=f"Raw sensor data from sensor {x}.",
                data=H5DataIO(df[[f"x{x}", f"y{x}"]].values,
                              compression="gzip"),
                reference_frame="Unknown",
                conversion=np.nan,  # whl is in arbitrary units
                starting_time=starting_time,
                rate=fs,
                resolution=np.nan,
            ))
    def run_conversion(self, nwbfile: NWBFile, metadata: dict):
        video_folder = Path(self.source_data['folder_path'])
        video_file_path_list = [
            str(x) for x in video_folder.iterdir() if x.suffix == ".mkv"
        ]

        video_timestamps = np.empty(0)
        for video_file_path in video_file_path_list:
            video_time_df = pd.read_csv(video_file_path.replace(
                ".mkv", "_timestamps.csv"),
                                        delimiter=";",
                                        skipinitialspace=True)
            video_timestamps = np.append(
                video_timestamps, video_time_df['timestamp'].to_numpy() / 1E3)

        # Custom labeled events
        videos = ImageSeries(name='Videos',
                             description="Videos recorded by TIS camera.",
                             format="external",
                             external_file=video_file_path_list,
                             timestamps=H5DataIO(video_timestamps,
                                                 compression="gzip"))
        nwbfile.add_acquisition(videos)
Beispiel #16
0
 def create_acquisition(self):
     """
     Acquisition data like audiospectrogram(raw beh data), nidq(raw ephys data), raw camera data.
     These are independent of probe type.
     """
     for neurodata_type_name, neurodata_type_args_list in self.nwb_metadata[
             'Acquisition'].items():
         data_retrieved_args_list = self._get_data(neurodata_type_args_list)
         for neurodata_type_args in data_retrieved_args_list:
             if neurodata_type_name == 'ImageSeries':
                 for types, times in zip(neurodata_type_args['data'],
                                         neurodata_type_args['timestamps']):
                     customargs = dict(name='camera_raw',
                                       external_file=[str(types)],
                                       format='external',
                                       timestamps=times,
                                       unit='n.a.')
                     self.nwbfile.add_acquisition(ImageSeries(**customargs))
             elif neurodata_type_name == 'DecompositionSeries':
                 neurodata_type_args['bands'] = np.squeeze(
                     neurodata_type_args['bands'])
                 freqs = DynamicTable(
                     'bands',
                     'spectogram frequencies',
                     id=np.arange(neurodata_type_args['bands'].shape[0]))
                 freqs.add_column('freq',
                                  'frequency value',
                                  data=neurodata_type_args['bands'])
                 neurodata_type_args.update(dict(bands=freqs))
                 temp = neurodata_type_args['data'][:, :, np.newaxis]
                 neurodata_type_args['data'] = np.moveaxis(
                     temp, [0, 1, 2], [0, 2, 1])
                 ts = neurodata_type_args.pop('timestamps')
                 starting_time = ts[0][0] if isinstance(
                     ts[0], np.ndarray) else ts[0]
                 neurodata_type_args.update(
                     dict(starting_time=np.float64(starting_time),
                          rate=1 / np.mean(np.diff(ts.squeeze())),
                          unit='sec'))
                 self.nwbfile.add_acquisition(
                     DecompositionSeries(**neurodata_type_args))
             elif neurodata_type_name == 'ElectricalSeries':
                 if not self.electrode_table_exist:
                     self.create_electrode_table_ecephys()
                 if neurodata_type_args['name'] in ['raw.lf', 'raw.ap']:
                     for probe_no in range(self.no_probes):
                         if neurodata_type_args['data'][probe_no].shape[
                                 1] > self._one_data.data_attrs_dump[
                                     'electrode_table_length'][probe_no]:
                             if 'channels.rawInd' in self._one_data.loaded_datasets:
                                 channel_idx = self._one_data.loaded_datasets[
                                     'channels.rawInd'][
                                         probe_no].data.astype('int')
                             else:
                                 warnings.warn(
                                     'could not find channels.rawInd')
                                 break
                         else:
                             channel_idx = slice(None)
                         self.nwbfile.add_acquisition(
                             ElectricalSeries(
                                 name=neurodata_type_args['name'] + '_' +
                                 self.nwb_metadata['Probes'][probe_no]
                                 ['name'],
                                 starting_time=np.abs(
                                     np.round(
                                         neurodata_type_args['timestamps']
                                         [probe_no][0, 1], 2)
                                 ),  # round starting times of the order of 1e-5
                                 rate=neurodata_type_args['data']
                                 [probe_no].fs,
                                 data=H5DataIO(
                                     DataChunkIterator(
                                         _iter_datasetview(
                                             neurodata_type_args['data']
                                             [probe_no],
                                             channel_ids=channel_idx),
                                         buffer_size=self.buffer_size),
                                     compression=True,
                                     shuffle=self.shuffle,
                                     compression_opts=self.complevel),
                                 electrodes=self.probe_dt_region[probe_no],
                                 channel_conversion=neurodata_type_args[
                                     'data']
                                 [probe_no].channel_conversion_sample2v[
                                     neurodata_type_args['data']
                                     [probe_no].type][channel_idx]))
                 elif neurodata_type_args['name'] in ['raw.nidq']:
                     self.nwbfile.add_acquisition(
                         ElectricalSeries(**neurodata_type_args))
Beispiel #17
0
def add_imaging(nwbfile,
                expt,
                z_spacing=25.,
                device_name='2P Microscope',
                location='CA1',
                indicator='GCaMP6f',
                excitation_lambda=920.,
                data_root=None,
                stub=False):

    color_dict = {'Ch1': 'Red', 'Ch2': 'Green'}
    # Emissions for mCherry and GCaMP
    # TODO make this more flexible
    emission = {'Ch1': 640., 'Ch2': 530.}

    ch_names = ['Ch1', 'Ch2']

    optical_channels = []
    for ch_name in ch_names:

        optical_channel = OpticalChannel(name=ch_name,
                                         description=color_dict[ch_name],
                                         emission_lambda=emission[ch_name])

        optical_channels.append(optical_channel)

    h5_path = glob(os.path.join(data_root, '*.h5'))[0]

    pv_xml = os.path.join(data_root, os.path.basename(data_root) + '.xml')
    pv_version = get_prairieview_version(pv_xml)
    [y_um, x_um] = get_element_size_um(pv_xml, pv_version)[-2:]

    elem_size_um = [z_spacing, y_um, x_um]

    # TODO allow for flexibility in setting device, excitation, indicator, location
    # TODO nwb-schema issue #151 needs to be resolved so we can actually use imaging data size

    device = nwbfile.create_device(device_name)

    imaging_plane = nwbfile.create_imaging_plane(
        name='Imaging Data',
        optical_channel=optical_channels,
        description='imaging data for both channels',
        device=device,
        excitation_lambda=excitation_lambda,
        imaging_rate=1 / expt.frame_period(),
        indicator=indicator,
        location=location,
        conversion=1.0,  # Should actually be elem_size_um
        manifold=np.ones((2, 2, 2, 3)),
        reference_frame='reference_frame',
        unit='um')

    f = h5py.File(h5_path, 'r')
    imaging_data = f['imaging']
    channel_names = f['imaging'].attrs['channel_names']

    for c, channel_name in enumerate(channel_names):
        if not stub:
            data_in = H5DataIO(DataChunkIterator(tqdm(
                (np.swapaxes(data[..., c], 0, 2) for data in imaging_data),
                total=imaging_data.shape[0]),
                                                 buffer_size=5000),
                               compression='gzip')

        else:
            data_in = np.ones((10, 10, 10))  # use for dev testing for speed

        # TODO parse env file to add power and pmt gain?
        image_series = TwoPhotonSeries(name='2p_Series_' + channel_name,
                                       dimension=expt.frame_shape()[:-1],
                                       data=data_in,
                                       imaging_plane=imaging_plane,
                                       rate=1 / expt.frame_period(),
                                       starting_time=0.,
                                       description=channel_name)

        nwbfile.add_acquisition(image_series)
Beispiel #18
0
def yuta2nwb(session_path='/Users/bendichter/Desktop/Buzsaki/SenzaiBuzsaki2017/YutaMouse41/YutaMouse41-150903',
             subject_xls=None, include_spike_waveforms=True, stub=True):

    subject_path, session_id = os.path.split(session_path)
    fpath_base = os.path.split(subject_path)[0]
    identifier = session_id
    mouse_number = session_id[9:11]
    if '-' in session_id:
        subject_id, date_text = session_id.split('-')
        b = False
    else:
        subject_id, date_text = session_id.split('b')
        b = True

    if subject_xls is None:
        subject_xls = os.path.join(subject_path, 'YM' + mouse_number + ' exp_sheet.xlsx')
    else:
        if not subject_xls[-4:] == 'xlsx':
            subject_xls = os.path.join(subject_xls, 'YM' + mouse_number + ' exp_sheet.xlsx')

    session_start_time = dateparse(date_text, yearfirst=True)

    df = pd.read_excel(subject_xls)

    subject_data = {}
    for key in ['genotype', 'DOB', 'implantation', 'Probe', 'Surgery', 'virus injection', 'mouseID']:
        names = df.iloc[:, 0]
        if key in names.values:
            subject_data[key] = df.iloc[np.argmax(names == key), 1]

    if isinstance(subject_data['DOB'], datetime):
        age = session_start_time - subject_data['DOB']
    else:
        age = None

    subject = Subject(subject_id=subject_id, age=str(age),
                      genotype=subject_data['genotype'],
                      species='mouse')

    nwbfile = NWBFile(session_description='mouse in open exploration and theta maze',
                      identifier=identifier,
                      session_start_time=session_start_time.astimezone(),
                      file_create_date=datetime.now().astimezone(),
                      experimenter='Yuta Senzai',
                      session_id=session_id,
                      institution='NYU',
                      lab='Buzsaki',
                      subject=subject,
                      related_publications='DOI:10.1016/j.neuron.2016.12.011')

    print('reading and writing raw position data...', end='', flush=True)
    ns.add_position_data(nwbfile, session_path)

    shank_channels = ns.get_shank_channels(session_path)[:8]
    all_shank_channels = np.concatenate(shank_channels)

    print('setting up electrodes...', end='', flush=True)
    hilus_csv_path = os.path.join(fpath_base, 'early_session_hilus_chans.csv')
    lfp_channel = get_reference_elec(subject_xls, hilus_csv_path, session_start_time, session_id, b=b)
    print(lfp_channel)
    custom_column = [{'name': 'theta_reference',
                      'description': 'this electrode was used to calculate LFP canonical bands',
                      'data': all_shank_channels == lfp_channel}]
    ns.write_electrode_table(nwbfile, session_path, custom_columns=custom_column, max_shanks=max_shanks)

    print('reading LFPs...', end='', flush=True)
    lfp_fs, all_channels_data = ns.read_lfp(session_path, stub=stub)

    lfp_data = all_channels_data[:, all_shank_channels]
    print('writing LFPs...', flush=True)
    # lfp_data[:int(len(lfp_data)/4)]
    lfp_ts = ns.write_lfp(nwbfile, lfp_data, lfp_fs, name='lfp',
                          description='lfp signal for all shank electrodes')

    for name, channel in special_electrode_dict.items():
        ts = TimeSeries(name=name, description='environmental electrode recorded inline with neural data',
                        data=all_channels_data[channel], rate=lfp_fs, unit='V', conversion=np.nan, resolution=np.nan)
        nwbfile.add_acquisition(ts)

    # compute filtered LFP
    print('filtering LFP...', end='', flush=True)
    all_lfp_phases = []
    for passband in ('theta', 'gamma'):
        lfp_fft = filter_lfp(lfp_data[:, all_shank_channels == lfp_channel].ravel(), lfp_fs, passband=passband)
        lfp_phase, _ = hilbert_lfp(lfp_fft)
        all_lfp_phases.append(lfp_phase[:, np.newaxis])
    data = np.dstack(all_lfp_phases)
    print('done.', flush=True)

    if include_spike_waveforms:
        print('writing waveforms...', end='', flush=True)
        for shankn in np.arange(1, 9, dtype=int):
            ns.write_spike_waveforms(nwbfile, session_path, shankn, stub=stub)
        print('done.', flush=True)

    decomp_series = DecompositionSeries(name='LFPDecompositionSeries',
                                        description='Theta and Gamma phase for reference LFP',
                                        data=data, rate=lfp_fs,
                                        source_timeseries=lfp_ts,
                                        metric='phase', unit='radians')
    decomp_series.add_band(band_name='theta', band_limits=(4, 10))
    decomp_series.add_band(band_name='gamma', band_limits=(30, 80))

    check_module(nwbfile, 'ecephys', 'contains processed extracellular electrophysiology data').add_data_interface(decomp_series)

    [nwbfile.add_stimulus(x) for x in ns.get_events(session_path)]

    # create epochs corresponding to experiments/environments for the mouse

    sleep_state_fpath = os.path.join(session_path, '{}--StatePeriod.mat'.format(session_id))

    exist_pos_data = any(os.path.isfile(os.path.join(session_path, '{}__{}.mat'.format(session_id, task_type['name'])))
                         for task_type in task_types)

    if exist_pos_data:
        nwbfile.add_epoch_column('label', 'name of epoch')

    for task_type in task_types:
        label = task_type['name']

        file = os.path.join(session_path, session_id + '__' + label + '.mat')
        if os.path.isfile(file):
            print('loading position for ' + label + '...', end='', flush=True)

            pos_obj = Position(name=label + '_position')

            matin = loadmat(file)
            tt = matin['twhl_norm'][:, 0]
            exp_times = find_discontinuities(tt)

            if 'conversion' in task_type:
                conversion = task_type['conversion']
            else:
                conversion = np.nan

            for pos_type in ('twhl_norm', 'twhl_linearized'):
                if pos_type in matin:
                    pos_data_norm = matin[pos_type][:, 1:]

                    spatial_series_object = SpatialSeries(
                        name=label + '_{}_spatial_series'.format(pos_type),
                        data=H5DataIO(pos_data_norm, compression='gzip'),
                        reference_frame='unknown', conversion=conversion,
                        resolution=np.nan,
                        timestamps=H5DataIO(tt, compression='gzip'))
                    pos_obj.add_spatial_series(spatial_series_object)

            check_module(nwbfile, 'behavior', 'contains processed behavioral data').add_data_interface(pos_obj)
            for i, window in enumerate(exp_times):
                nwbfile.add_epoch(start_time=window[0], stop_time=window[1],
                                  label=label + '_' + str(i))
            print('done.')

    # there are occasional mismatches between the matlab struct and the neuroscope files
    # regions: 3: 'CA3', 4: 'DG'

    df_unit_features = get_UnitFeatureCell_features(fpath_base, session_id, session_path)

    celltype_names = []
    for celltype_id, region_id in zip(df_unit_features['fineCellType'].values,
                                      df_unit_features['region'].values):
        if celltype_id == 1:
            if region_id == 3:
                celltype_names.append('pyramidal cell')
            elif region_id == 4:
                celltype_names.append('granule cell')
            else:
                raise Exception('unknown type')
        elif not np.isfinite(celltype_id):
            celltype_names.append('missing')
        else:
            celltype_names.append(celltype_dict[celltype_id])

    custom_unit_columns = [
        {
            'name': 'cell_type',
            'description': 'name of cell type',
            'data': celltype_names},
        {
            'name': 'global_id',
            'description': 'global id for cell for entire experiment',
            'data': df_unit_features['unitID'].values},
        {
            'name': 'max_electrode',
            'description': 'electrode that has the maximum amplitude of the waveform',
            'data': get_max_electrodes(nwbfile, session_path),
            'table': nwbfile.electrodes
        }]

    ns.add_units(nwbfile, session_path, custom_unit_columns, max_shanks=max_shanks)

    trialdata_path = os.path.join(session_path, session_id + '__EightMazeRun.mat')
    if os.path.isfile(trialdata_path):
        trials_data = loadmat(trialdata_path)['EightMazeRun']

        trialdatainfo_path = os.path.join(fpath_base, 'EightMazeRunInfo.mat')
        trialdatainfo = [x[0] for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0]]

        features = trialdatainfo[:7]
        features[:2] = 'start_time', 'stop_time',
        [nwbfile.add_trial_column(x, 'description') for x in features[4:] + ['condition']]

        for trial_data in trials_data:
            if trial_data[3]:
                cond = 'run_left'
            else:
                cond = 'run_right'
            nwbfile.add_trial(start_time=trial_data[0], stop_time=trial_data[1], condition=cond,
                              error_run=trial_data[4], stim_run=trial_data[5], both_visit=trial_data[6])
    """
    mono_syn_fpath = os.path.join(session_path, session_id+'-MonoSynConvClick.mat')

    matin = loadmat(mono_syn_fpath)
    exc = matin['FinalExcMonoSynID']
    inh = matin['FinalInhMonoSynID']

    #exc_obj = CatCellInfo(name='excitatory_connections',
    #                      indices_values=[], cell_index=exc[:, 0] - 1, indices=exc[:, 1] - 1)
    #module_cellular.add_container(exc_obj)
    #inh_obj = CatCellInfo(name='inhibitory_connections',
    #                      indices_values=[], cell_index=inh[:, 0] - 1, indices=inh[:, 1] - 1)
    #module_cellular.add_container(inh_obj)
    """

    if os.path.isfile(sleep_state_fpath):
        matin = loadmat(sleep_state_fpath)['StatePeriod']

        table = TimeIntervals(name='states', description='sleep states of animal')
        table.add_column(name='label', description='sleep state')

        data = []
        for name in matin.dtype.names:
            for row in matin[name][0][0]:
                data.append({'start_time': row[0], 'stop_time': row[1], 'label': name})
        [table.add_row(**row) for row in sorted(data, key=lambda x: x['start_time'])]

        check_module(nwbfile, 'behavior', 'contains behavioral data').add_data_interface(table)

    if stub:
        out_fname = session_path + '_stub.nwb'
    else:
        out_fname = session_path + '.nwb'

    print('writing NWB file...', end='', flush=True)
    with NWBHDF5IO(out_fname, mode='w') as io:
        io.write(nwbfile)
    print('done.')

    print('testing read...', end='', flush=True)
    # test read
    with NWBHDF5IO(out_fname, mode='r') as io:
        io.read()
    print('done.')
Beispiel #19
0
if isinstance(lfp_file, str):

    # this needs to be rewritten to
    # 1) pull the number of channel (here hard coded as N = 80), from the XML
    # 2) load in chunks so you don't overwhelm the RAM

    all_channels = np.fromfile(lfp_file, dtype=np.int16).reshape(-1, 80)
    all_channels_lfp = all_channels[:, all_shank_channels]
    print('done.')

    if WRITE_ALL_LFPS:
        print('making ElectricalSeries objects for LFP...', end='', flush=True)
        lfp = nwbfile.add_acquisition(
            ElectricalSeries('lfp',
                             'lfp signal for all shank electrodes',
                             H5DataIO(all_channels_lfp, compression='gzip'),
                             all_table_region,
                             conversion=np.nan,
                             starting_time=0.0,
                             rate=lfp_fs,
                             resolution=np.nan))
        all_ts.append(lfp)
        print('done.')

module_behavior = nwbfile.create_processing_module(
    name='behavior', description='contains behavioral data')

out_fname = fname + '.nwb'
print('writing NWB file...', end='', flush=True)
with NWBHDF5IO(out_fname, mode='w') as io:
    io.write(nwbfile, cache_spec=False)
Beispiel #20
0
    def convert_data(
        self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool = False, include_spike_waveforms: bool = False
    ):
        session_path = self.input_args["folder_path"]
        # TODO: check/enforce format?
        task_types = metadata_dict.get("task_types", [])

        subject_path, session_id = os.path.split(session_path)
        fpath_base = os.path.split(subject_path)[0]

        [nwbfile.add_stimulus(x) for x in get_events(session_path)]

        exist_pos_data = any(
            os.path.isfile(os.path.join(session_path, "{}__{}.mat".format(session_id, task_type["name"])))
            for task_type in task_types
        )

        if exist_pos_data:
            nwbfile.add_epoch_column("label", "name of epoch")

        for task_type in task_types:
            label = task_type["name"]

            file = os.path.join(session_path, session_id + "__" + label + ".mat")
            if os.path.isfile(file):
                pos_obj = Position(name=label + "_position")

                matin = loadmat(file)
                tt = matin["twhl_norm"][:, 0]
                exp_times = find_discontinuities(tt)

                if "conversion" in task_type:
                    conversion = task_type["conversion"]
                else:
                    conversion = np.nan

                for pos_type in ("twhl_norm", "twhl_linearized"):
                    if pos_type in matin:
                        pos_data_norm = matin[pos_type][:, 1:]

                        spatial_series_object = SpatialSeries(
                            name=label + "_{}_spatial_series".format(pos_type),
                            data=H5DataIO(pos_data_norm, compression="gzip"),
                            reference_frame="unknown",
                            conversion=conversion,
                            resolution=np.nan,
                            timestamps=H5DataIO(tt, compression="gzip"),
                        )
                        pos_obj.add_spatial_series(spatial_series_object)

                check_module(nwbfile, "behavior", "contains processed behavioral data").add_data_interface(pos_obj)
                for i, window in enumerate(exp_times):
                    nwbfile.add_epoch(start_time=window[0], stop_time=window[1], label=label + "_" + str(i))

        trialdata_path = os.path.join(session_path, session_id + "__EightMazeRun.mat")
        if os.path.isfile(trialdata_path):
            trials_data = loadmat(trialdata_path)["EightMazeRun"]

            trialdatainfo_path = os.path.join(fpath_base, "EightMazeRunInfo.mat")
            trialdatainfo = [x[0] for x in loadmat(trialdatainfo_path)["EightMazeRunInfo"][0]]

            features = trialdatainfo[:7]
            features[:2] = (
                "start_time",
                "stop_time",
            )
            [nwbfile.add_trial_column(x, "description") for x in features[4:] + ["condition"]]

            for trial_data in trials_data:
                if trial_data[3]:
                    cond = "run_left"
                else:
                    cond = "run_right"
                nwbfile.add_trial(
                    start_time=trial_data[0],
                    stop_time=trial_data[1],
                    condition=cond,
                    error_run=trial_data[4],
                    stim_run=trial_data[5],
                    both_visit=trial_data[6],
                )

        sleep_state_fpath = os.path.join(session_path, "{}.SleepState.states.mat".format(session_id))
        # label renaming specific to Watson
        state_label_names = {"WAKEstate": "Awake", "NREMstate": "Non-REM", "REMstate": "REM"}
        if os.path.isfile(sleep_state_fpath):
            matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0]

            table = TimeIntervals(name="states", description="Sleep states of animal.")
            table.add_column(name="label", description="Sleep state.")

            data = []
            for name in matin.dtype.names:
                for row in matin[name][0][0]:
                    data.append({"start_time": row[0], "stop_time": row[1], "label": state_label_names[name]})
            [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])]

            check_module(nwbfile, "behavior", "contains behavioral data").add_data_interface(table)
Beispiel #21
0
data = np.arange(100, 200, 10)
timestamps = np.arange(10)
test_ts = TimeSeries(name='test_regular_timeseries',
                     data=data,
                     unit='SIunit',
                     timestamps=timestamps)
nwbfile.add_acquisition(test_ts)

####################
# Now let's say we want to compress the recorded data values. We now simply need to wrap our data with H5DataIO.
# Everything else remains the same

from hdmf.backends.hdf5.h5_utils import H5DataIO

wrapped_data = H5DataIO(data=data, compression=True)  # <----
test_ts = TimeSeries(
    name='test_compressed_timeseries',
    data=wrapped_data,  # <----
    unit='SIunit',
    timestamps=timestamps)
nwbfile.add_acquisition(test_ts)

####################
# This simple approach gives us access to a broad range of advanced I/O features, such as, chunking and
# compression. For a complete list of all available settings see :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO`

####################
# Chunking
# --------
#
Beispiel #22
0
chunk_shape = (10, 10)
num_values = num_chunks * np.prod(chunk_shape)

# Create our sparse matrix data.
data = SparseMatrixIterator(shape=(xsize, ysize),
                            num_chunks=num_chunks,
                            chunk_shape=chunk_shape)

#####################
# In order to also enable compression and other advanced HDF5 dataset I/O featurs we can then also
# wrap our data via :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO`.
from hdmf.backends.hdf5.h5_utils import H5DataIO
matrix2 = SparseMatrixIterator(shape=(xsize, ysize),
                               num_chunks=num_chunks,
                               chunk_shape=chunk_shape)
data2 = H5DataIO(data=matrix2, compression='gzip', compression_opts=4)

######################
# We can now also customize the chunking , fillvalue and other settings
#
from hdmf.backends.hdf5.h5_utils import H5DataIO

# Increase the chunk size and add compression
matrix3 = SparseMatrixIterator(shape=(xsize, ysize),
                               num_chunks=num_chunks,
                               chunk_shape=chunk_shape)
data3 = H5DataIO(data=matrix3, chunks=(100, 100), fillvalue=np.nan)

# Increase the chunk size and add compression
matrix4 = SparseMatrixIterator(shape=(xsize, ysize),
                               num_chunks=num_chunks,
Beispiel #23
0
    def convert_data(
        self,
        nwbfile: NWBFile,
        metadata_dict: dict,
        stub_test: bool = False,
        include_spike_waveforms: bool = False,
    ):
        """Convert the behavioral portion of a particular session of the GrosmarkAD dataset."""
        session_path = self.input_args["folder_path"]
        subject_path, session_id = os.path.split(session_path)

        # Stimuli
        [nwbfile.add_stimulus(x) for x in get_events(session_path)]

        # States
        sleep_state_fpath = os.path.join(session_path,
                                         "{session_id}.SleepState.states.mat")
        # label renaming specific to Watson
        state_label_names = dict(WAKEstate="Awake",
                                 NREMstate="Non-REM",
                                 REMstate="REM")
        if os.path.isfile(sleep_state_fpath):
            matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0]

            table = TimeIntervals(name="states",
                                  description="Sleep states of animal.")
            table.add_column(name="label", description="Sleep state.")

            data = []
            for name in matin.dtype.names:
                for row in matin[name][0][0]:
                    data.append(
                        dict(
                            start_time=row[0],
                            stop_time=row[1],
                            label=state_label_names[name],
                        ))
            [
                table.add_row(**row)
                for row in sorted(data, key=lambda x: x["start_time"])
            ]
            check_module(nwbfile, "behavior",
                         "contains behavioral data").add_data_interface(table)

        # Position
        pos_filepath = Path(
            session_path) / f"{session_id}.position.behavior.mat"
        pos_mat = loadmat(str(pos_filepath.absolute()))
        starting_time = float(
            pos_mat["position"]["timestamps"][0][0]
            [0])  # confirmed to be a regularly sampled series
        rate = float(
            pos_mat["position"]["timestamps"][0][0][1]) - starting_time
        if pos_mat["position"]["units"][0][0][0] == "m":
            conversion = 1.0
        else:
            warnings.warn(
                f"Spatial units ({pos_mat['position']['units'][0][0][0]}) not listed in meters; "
                "setting conversion to nan.")
            conversion = np.nan
        pos_data = [[x[0], y[0]] for x, y in zip(
            pos_mat["position"]["position"][0][0]["x"][0][0],
            pos_mat["position"]["position"][0][0]["y"][0][0],
        )]
        linearized_data = [[
            lin[0]
        ] for lin in pos_mat["position"]["position"][0][0]["lin"][0][0]]

        label = pos_mat["position"]["behaviorinfo"][0][0]["MazeType"][0][0][
            0].replace(" ", "")
        pos_obj = Position(name=f"{label}Position")
        spatial_series_object = SpatialSeries(
            name=f"{label}SpatialSeries",
            description=
            "(x,y) coordinates tracking subject movement through the maze.",
            data=H5DataIO(pos_data, compression="gzip"),
            reference_frame="unknown",
            conversion=conversion,
            starting_time=starting_time,
            rate=rate,
            resolution=np.nan,
        )
        pos_obj.add_spatial_series(spatial_series_object)
        check_module(
            nwbfile, "behavior",
            "contains processed behavioral data").add_data_interface(pos_obj)

        lin_pos_obj = Position(name=f"{label}LinearizedPosition")
        lin_spatial_series_object = SpatialSeries(
            name=f"{label}LinearizedTimeSeries",
            description=
            "Linearized position, defined as starting at the edge of reward area, "
            "and increasing clockwise, terminating at the opposing edge of the reward area.",
            data=H5DataIO(linearized_data, compression="gzip"),
            reference_frame="unknown",
            conversion=conversion,
            starting_time=starting_time,
            rate=rate,
            resolution=np.nan,
        )
        lin_pos_obj.add_spatial_series(lin_spatial_series_object)
        check_module(nwbfile, "behavior",
                     "contains processed behavioral data").add_data_interface(
                         lin_pos_obj)

        # Epochs
        epoch_names = list(pos_mat["position"]["Epochs"][0][0].dtype.names)
        epoch_windows = [[float(start), float(stop)]
                         for x in pos_mat["position"]["Epochs"][0][0][0][0]
                         for start, stop in x]
        nwbfile.add_epoch_column("label", "name of epoch")
        for j, epoch_name in enumerate(epoch_names):
            nwbfile.add_epoch(
                start_time=epoch_windows[j][0],
                stop_time=epoch_windows[j][1],
                label=epoch_name,
            )
Beispiel #24
0
    def write_segmentation(
        segext_obj: SegmentationExtractor,
        save_path: PathType = None,
        plane_num=0,
        metadata: dict = None,
        overwrite: bool = True,
        buffer_size: int = 10,
        nwbfile=None,
    ):
        assert (
            save_path is None or nwbfile is None
        ), "Either pass a save_path location, or nwbfile object, but not both!"

        # parse metadata correctly:
        if isinstance(segext_obj, MultiSegmentationExtractor):
            segext_objs = segext_obj.segmentations
            if metadata is not None:
                assert isinstance(metadata, list), (
                    "For MultiSegmentationExtractor enter 'metadata' as a list of "
                    "SegmentationExtractor metadata")
                assert len(metadata) == len(segext_objs), (
                    "The 'metadata' argument should be a list with the same "
                    "number of elements as the segmentations in the "
                    "MultiSegmentationExtractor")
        else:
            segext_objs = [segext_obj]
            if metadata is not None and not isinstance(metadata, list):
                metadata = [metadata]
        metadata_base_list = [
            NwbSegmentationExtractor.get_nwb_metadata(sgobj)
            for sgobj in segext_objs
        ]
        print(f"writing nwb for {segext_obj.extractor_name}\n")
        # updating base metadata with new:
        for num, data in enumerate(metadata_base_list):
            metadata_input = metadata[num] if metadata else {}
            metadata_base_list[num] = dict_recursive_update(
                metadata_base_list[num], metadata_input)
        metadata_base_common = metadata_base_list[0]

        # build/retrieve nwbfile:
        if nwbfile is not None:
            assert isinstance(
                nwbfile, NWBFile), "'nwbfile' should be of type pynwb.NWBFile"
            write = False
        else:
            write = True
            save_path = Path(save_path)
            assert save_path.suffix == ".nwb"
            if save_path.is_file() and not overwrite:
                nwbfile_exist = True
                file_mode = "r+"
            else:
                if save_path.is_file():
                    os.remove(save_path)
                if not save_path.parent.is_dir():
                    save_path.parent.mkdir(parents=True)
                nwbfile_exist = False
                file_mode = "w"
            io = NWBHDF5IO(str(save_path), file_mode)
            if nwbfile_exist:
                nwbfile = io.read()
            else:
                nwbfile = NWBFile(**metadata_base_common["NWBFile"])

        # Subject:
        if metadata_base_common.get("Subject") and nwbfile.subject is None:
            nwbfile.subject = Subject(**metadata_base_common["Subject"])

        # Processing Module:
        if "ophys" not in nwbfile.processing:
            ophys = nwbfile.create_processing_module(
                "ophys", "contains optical physiology processed data")
        else:
            ophys = nwbfile.get_processing_module("ophys")

        for plane_no_loop, (segext_obj, metadata) in enumerate(
                zip(segext_objs, metadata_base_list)):
            # Device:
            if metadata["Ophys"]["Device"][0]["name"] not in nwbfile.devices:
                nwbfile.create_device(**metadata["Ophys"]["Device"][0])

            # ImageSegmentation:
            image_segmentation_name = (
                "ImageSegmentation" if plane_no_loop == 0 else
                f"ImageSegmentation_Plane{plane_no_loop}")
            if image_segmentation_name not in ophys.data_interfaces:
                image_segmentation = ImageSegmentation(
                    name=image_segmentation_name)
                ophys.add(image_segmentation)
            else:
                image_segmentation = ophys.data_interfaces.get(
                    image_segmentation_name)

            # OpticalChannel:
            optical_channels = [
                OpticalChannel(**i) for i in metadata["Ophys"]["ImagingPlane"]
                [0]["optical_channel"]
            ]

            # ImagingPlane:
            image_plane_name = ("ImagingPlane" if plane_no_loop == 0 else
                                f"ImagePlane_{plane_no_loop}")
            if image_plane_name not in nwbfile.imaging_planes.keys():
                input_kwargs = dict(
                    name=image_plane_name,
                    device=nwbfile.get_device(
                        metadata_base_common["Ophys"]["Device"][0]["name"]),
                )
                metadata["Ophys"]["ImagingPlane"][0][
                    "optical_channel"] = optical_channels
                input_kwargs.update(**metadata["Ophys"]["ImagingPlane"][0])
                if "imaging_rate" in input_kwargs:
                    input_kwargs["imaging_rate"] = float(
                        input_kwargs["imaging_rate"])
                imaging_plane = nwbfile.create_imaging_plane(**input_kwargs)
            else:
                imaging_plane = nwbfile.imaging_planes[image_plane_name]

            # PlaneSegmentation:
            input_kwargs = dict(
                description="output from segmenting imaging plane",
                imaging_plane=imaging_plane,
            )
            ps_metadata = metadata["Ophys"]["ImageSegmentation"][
                "plane_segmentations"][0]
            if ps_metadata[
                    "name"] not in image_segmentation.plane_segmentations:
                ps_exist = False
            else:
                ps = image_segmentation.get_plane_segmentation(
                    ps_metadata["name"])
                ps_exist = True

            roi_ids = segext_obj.get_roi_ids()
            accepted_list = segext_obj.get_accepted_list()
            accepted_list = [] if accepted_list is None else accepted_list
            rejected_list = segext_obj.get_rejected_list()
            rejected_list = [] if rejected_list is None else rejected_list
            accepted_ids = [1 if k in accepted_list else 0 for k in roi_ids]
            rejected_ids = [1 if k in rejected_list else 0 for k in roi_ids]
            roi_locations = np.array(segext_obj.get_roi_locations()).T

            def image_mask_iterator():
                for id in segext_obj.get_roi_ids():
                    img_msks = segext_obj.get_roi_image_masks(
                        roi_ids=[id]).T.squeeze()
                    yield img_msks

            if not ps_exist:
                input_kwargs.update(
                    **ps_metadata,
                    columns=[
                        VectorData(
                            data=H5DataIO(
                                DataChunkIterator(image_mask_iterator(),
                                                  buffer_size=buffer_size),
                                compression=True,
                                compression_opts=9,
                            ),
                            name="image_mask",
                            description="image masks",
                        ),
                        VectorData(
                            data=roi_locations,
                            name="RoiCentroid",
                            description=
                            "x,y location of centroid of the roi in image_mask",
                        ),
                        VectorData(
                            data=accepted_ids,
                            name="Accepted",
                            description=
                            "1 if ROi was accepted or 0 if rejected as a cell during segmentation operation",
                        ),
                        VectorData(
                            data=rejected_ids,
                            name="Rejected",
                            description=
                            "1 if ROi was rejected or 0 if accepted as a cell during segmentation operation",
                        ),
                    ],
                    id=roi_ids,
                )

                ps = image_segmentation.create_plane_segmentation(
                    **input_kwargs)

            # Fluorescence Traces:
            if "Flourescence" not in ophys.data_interfaces:
                fluorescence = Fluorescence()
                ophys.add(fluorescence)
            else:
                fluorescence = ophys.data_interfaces["Fluorescence"]
            roi_response_dict = segext_obj.get_traces_dict()
            roi_table_region = ps.create_roi_table_region(
                description=f"region for Imaging plane{plane_no_loop}",
                region=list(range(segext_obj.get_num_rois())),
            )
            rate = (np.float("NaN")
                    if segext_obj.get_sampling_frequency() is None else
                    segext_obj.get_sampling_frequency())
            for i, j in roi_response_dict.items():
                data = getattr(segext_obj, f"_roi_response_{i}")
                if data is not None:
                    data = np.asarray(data)
                    trace_name = "RoiResponseSeries" if i == "raw" else i.capitalize(
                    )
                    trace_name = (trace_name if plane_no_loop == 0 else
                                  trace_name + f"_Plane{plane_no_loop}")
                    input_kwargs = dict(
                        name=trace_name,
                        data=data.T,
                        rois=roi_table_region,
                        rate=rate,
                        unit="n.a.",
                    )
                    if trace_name not in fluorescence.roi_response_series:
                        fluorescence.create_roi_response_series(**input_kwargs)

            # create Two Photon Series:
            if "TwoPhotonSeries" not in nwbfile.acquisition:
                warn(
                    "could not find TwoPhotonSeries, using ImagingExtractor to create an nwbfile"
                )

            # adding images:
            images_dict = segext_obj.get_images_dict()
            if any([image is not None for image in images_dict.values()]):
                images_name = ("SegmentationImages" if plane_no_loop == 0 else
                               f"SegmentationImages_Plane{plane_no_loop}")
                if images_name not in ophys.data_interfaces:
                    images = Images(images_name)
                    for img_name, img_no in images_dict.items():
                        if img_no is not None:
                            images.add_image(
                                GrayscaleImage(name=img_name, data=img_no.T))
                    ophys.add(images)

            # saving NWB file:
            if write:
                io.write(nwbfile)
                io.close()
                # test read
                with NWBHDF5IO(str(save_path), "r") as io:
                    io.read()
Beispiel #25
0
def write_spike_waveforms_single_shank(
    nwbfile: NWBFile,
    session_path: str,
    shankn: int,
    spikes_nsamples: int,
    nchan_on_shank: int,
    stub_test: bool = False,
    compression: Optional[str] = "gzip",
):
    """Write spike waveforms to NWBFile.

    Parameters
    ----------
    nwbfile: pynwb.NWBFile
    session_path: str
    shankn: int
    spikes_nsamples: int
    nchan_on_shank: int
    stub_test: bool, optional
        default: False
    compression: str (optional)
        default: 'gzip'
    """
    session_name = os.path.split(session_path)[1]
    spk_file = os.path.join(session_path,
                            session_name + ".spk.{}".format(shankn))

    assert os.path.isfile(
        spk_file
    ), "No .spk.{} file found at the path location!" "Unable to retrieve spike waveforms.".format(
        shankn)

    group = nwbfile.electrode_groups["shank{}".format(shankn)]
    elec_idx = list(
        np.where(np.array(nwbfile.ec_electrodes["group"]) == group)[0])
    table_region = nwbfile.create_electrode_table_region(
        elec_idx, group.name + " region")

    if stub_test:
        n_stub_spikes = 50
        spks = np.fromfile(
            spk_file,
            dtype=np.int16,
            count=n_stub_spikes * spikes_nsamples * nchan_on_shank,
        ).reshape(n_stub_spikes, spikes_nsamples, nchan_on_shank)
        spk_times = read_spike_times(session_path, shankn)[:n_stub_spikes]
    else:
        spks = np.fromfile(spk_file,
                           dtype=np.int16).reshape(-1, spikes_nsamples,
                                                   nchan_on_shank)
        spk_times = read_spike_times(session_path, shankn)

    if compression:
        data = H5DataIO(spks, compression=compression)
    else:
        data = spks

    spike_event_series = SpikeEventSeries(
        name="SpikeWaveforms{}".format(shankn),
        data=data,
        timestamps=spk_times,
        conversion=1e-6,
        electrodes=table_region,
    )
    check_module(nwbfile, "ecephys").add_data_interface(spike_event_series)
Beispiel #26
0
nwbfile4.add_acquisition(test_ts4)

####################
# In the above case we did not make it explicit how we want to handle the data from
# our TimeSeries, this means that :py:class:`~pynwb.NWBHDF5IO` will need to
# determine on write how to treat the dataset. We can make this explicit and customize this
# behavior on a per-dataset basis by wrapping our dataset using
# :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO`

from hdmf.backends.hdf5.h5_utils import H5DataIO

# Create another timeseries that links to the same data
test_ts5 = TimeSeries(
    name='test_timeseries5',
    data=H5DataIO(
        data=timeseries_1_data,  # <-------
        link_data=True),  # <-------
    unit='SIunit',
    timestamps=timestamps)
nwbfile4.add_acquisition(test_ts5)

####################
# Step 4: Write the data
# ^^^^^^^^^^^^^^^^^^^^^^^
#
from pynwb import NWBHDF5IO

io4 = NWBHDF5IO(filename4, 'w')
io4.write(nwbfile4, link_data=True
          )  # <-------- Specify default behavior to link rather than copy data
io4.close()
Beispiel #27
0
###############################################################################
# Creating an expandable table
# ----------------------------
# When using the default HDF5 backend, each column of these tables is an HDF5 Dataset,
# which by default are set in size. This means that once a file is written, it is not
# possible to add a new row. If you want to be able to save this file, load it, and add
# more rows to the table, you will need to set this up when you create the
# :py:class:`~hdmf.common.table.DynamicTable`. You do this by wrapping the data with
# :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO` and the argument ``maxshape=(None, )``.

from hdmf.backends.hdf5.h5_utils import H5DataIO

col1 = VectorData(
    name='expandable_col1',
    description='column #1',
    data=H5DataIO(data=[1, 2], maxshape=(None, )),
)
col2 = VectorData(
    name='expandable_col2',
    description='column #2',
    data=H5DataIO(data=['a', 'b'], maxshape=(None, )),
)

# don't forget to wrap the row IDs too!
ids = ElementIdentifiers(
    name='id',
    data=H5DataIO(data=[0, 1], maxshape=(None, )),
)

expandable_table = DynamicTable(
    name='expandable_table',
Beispiel #28
0
    def run_conversion(self,
                       nwbfile: NWBFile,
                       metadata: dict,
                       stub_test: bool = False,
                       external_mode: bool = True,
                       starting_times: Optional[list] = None,
                       chunk_data: bool = True,
                       module_name: Optional[str] = None,
                       module_description: Optional[str] = None):
        """
        Convert the movie data files to ImageSeries and write them in the NWBFile.

        Parameters
        ----------
        nwbfile : NWBFile
        metadata : dict
        stub_test : bool, optional
            If True, truncates the write operation for fast testing. The default is False.
        external_mode : bool, optional
            ImageSeries in NWBFiles may contain either explicit movie data or file paths to external movie files. If
            True, this utilizes the more efficient method of merely encoding the file path linkage (recommended). For
            data sharing, the video files must be contained in the same folder as the NWBFile. If the intention of this
            NWBFile involves an upload to DANDI, the non-NWBFile types are not allowed so this flag would have to be
            set to False. The default is True.
        starting_times : list, optional
            List of start times for each movie. If unspecified, assumes that the movies in the file_paths list are in
            sequential order and are contiguous.
        chunk_data : bool, optional
            If True, uses a DataChunkIterator to read and write the movie, reducing overhead RAM usage at the cost of
            reduced conversion speed (compared to loading video entirely into RAM as an array). This will also force to
            True, even if manually set to False, whenever the video file size exceeds available system RAM by a factor
            of 70 (from compression experiments). Based on experiements for a ~30 FPS system of ~400 x ~600 color
            frames, the equivalent uncompressed RAM usage is around 2GB per minute of video. The default is True.
        module_name: str, optional
            Name of the processing module to add the ImageSeries object to. Default behavior is to add as acquisition.
        module_description: str, optional
            If the processing module specified by module_name does not exist, it will be created with this description.
            The default description is the same as used by the conversion_tools.get_module function.
        """
        file_paths = self.source_data['file_paths']

        if stub_test:
            count_max = 10
        else:
            count_max = np.inf
        if starting_times is not None:
            assert isinstance(starting_times, list) and all([isinstance(x, float) for x in starting_times]) \
                and len(starting_times) == len(file_paths), \
                "Argument 'starting_times' must be a list of floats in one-to-one correspondence with 'file_paths'!"
        else:
            starting_times = [0.]

        for j, file in enumerate(file_paths):
            timestamps = starting_times[j] + get_movie_timestamps(
                movie_file=file)

            if len(starting_times) != len(file_paths):
                starting_times.append(timestamps[-1])

            image_series_kwargs = dict(name=f"Video: {Path(file).stem}",
                                       description="Video recorded by camera.",
                                       unit="Frames")
            if check_regular_timestamps(ts=timestamps):
                fps = get_movie_fps(movie_file=file)
                image_series_kwargs.update(starting_time=starting_times[j],
                                           rate=fps)
            else:
                image_series_kwargs.update(
                    timestamps=H5DataIO(timestamps, compression="gzip"))

            if external_mode:
                image_series_kwargs.update(format="external",
                                           external_file=[file])
            else:
                uncompressed_estimate = Path(file).stat().st_size * 70
                available_memory = psutil.virtual_memory().available
                if not chunk_data and uncompressed_estimate >= available_memory:
                    warn(
                        f"Not enough memory (estimated {round(uncompressed_estimate/1e9, 2)} GB) to load movie file as "
                        f"array ({round(available_memory/1e9, 2)} GB available)! Forcing chunk_data to True."
                    )
                    chunk_data = True

                total_frames = len(timestamps)
                frame_shape = get_frame_shape(movie_file=file)
                maxshape = [total_frames]
                maxshape.extend(frame_shape)
                best_gzip_chunk = (1, frame_shape[0], frame_shape[1], 3)
                tqdm_pos, tqdm_mininterval = (0, 10)
                if chunk_data:

                    def data_generator(file, count_max):
                        cap = cv2.VideoCapture(str(file))
                        for _ in range(min(count_max, total_frames)):
                            success, frame = cap.read()
                            yield frame
                        cap.release()

                    mov = DataChunkIterator(
                        data=tqdm(
                            iterable=data_generator(file=file,
                                                    count_max=count_max),
                            desc=f"Copying movie data for {Path(file).name}",
                            position=tqdm_pos,
                            total=total_frames,
                            mininterval=tqdm_mininterval),
                        iter_axis=0,  # nwb standard is time as zero axis
                        maxshape=tuple(maxshape))
                    image_series_kwargs.update(data=H5DataIO(
                        mov, compression="gzip", chunks=best_gzip_chunk))
                else:
                    cap = cv2.VideoCapture(str(file))
                    mov = []
                    with tqdm(desc=f"Reading movie data for {Path(file).name}",
                              position=tqdm_pos,
                              total=total_frames,
                              mininterval=tqdm_mininterval) as pbar:
                        for _ in range(min(count_max, total_frames)):
                            success, frame = cap.read()
                            mov.append(frame)
                            pbar.update(1)
                    cap.release()
                    image_series_kwargs.update(data=H5DataIO(
                        DataChunkIterator(
                            tqdm(iterable=np.array(mov),
                                 desc=
                                 f"Writing movie data for {Path(file).name}",
                                 position=tqdm_pos,
                                 mininterval=tqdm_mininterval),
                            iter_axis=0,  # nwb standard is time as zero axis
                            maxshape=tuple(maxshape)),
                        compression="gzip",
                        chunks=best_gzip_chunk))
            if module_name is None:
                nwbfile.add_acquisition(ImageSeries(**image_series_kwargs))
            else:
                get_module(nwbfile=nwbfile,
                           name=module_name,
                           description=module_description).add(
                               ImageSeries(**image_series_kwargs))
Beispiel #29
0
chunk_shape = (10, 10)
num_values = num_chunks * np.prod(chunk_shape)

# Create our sparse matrix data.
data = SparseMatrixIterator(shape=(xsize, ysize),
                            num_chunks=num_chunks,
                            chunk_shape=chunk_shape)

#####################
# In order to also enable compression and other advanced HDF5 dataset I/O features we can then also
# wrap our data via :py:class:`~hdmf.backends.hdf5.h5_utils.H5DataIO`.
from hdmf.backends.hdf5.h5_utils import H5DataIO
matrix2 = SparseMatrixIterator(shape=(xsize, ysize),
                               num_chunks=num_chunks,
                               chunk_shape=chunk_shape)
data2 = H5DataIO(data=matrix2, compression='gzip', compression_opts=4)

######################
# We can now also customize the chunking , fillvalue and other settings
#
from hdmf.backends.hdf5.h5_utils import H5DataIO

# Increase the chunk size and add compression
matrix3 = SparseMatrixIterator(shape=(xsize, ysize),
                               num_chunks=num_chunks,
                               chunk_shape=chunk_shape)
data3 = H5DataIO(data=matrix3, chunks=(100, 100), fillvalue=np.nan)

# Increase the chunk size and add compression
matrix4 = SparseMatrixIterator(shape=(xsize, ysize),
                               num_chunks=num_chunks,
Beispiel #30
0
    def run_conversion(self, nwbfile: NWBFile, metadata: dict):
        mat_file_path = self.source_data["mat_file_path"]
        mat_file = loadmat(mat_file_path)
        trial_info = mat_file["SessionNP"]

        nwbfile.add_trial_column(
            name="reward_time",
            description="Time when subject began consuming reward.")
        nwbfile.add_trial_column(
            name="left_or_right",
            description="Time when subject began consuming reward.")
        l_r_dict = {1: "Right", 2: "Left"}
        for trial in trial_info:
            nwbfile.add_trial(start_time=trial[0],
                              stop_time=trial[1],
                              reward_time=trial[2],
                              left_or_right=l_r_dict[int(trial[3])])

        # Position
        pos_info = mat_file["whlrl"]
        pos_data = [pos_info[:, 0:1], pos_info[:, 2:3]]
        starting_time = 0.0
        rate = 20000 / 512  # from CRCNS info
        conversion = np.nan  # whl are arbitrary units
        pos_obj = Position(name="Position")
        for j in range(2):
            spatial_series_object = SpatialSeries(
                name=f"SpatialSeries{j+1}",
                description=
                "(x,y) coordinates tracking subject movement through the maze.",
                data=H5DataIO(pos_data[j], compression="gzip"),
                reference_frame="unknown",
                conversion=conversion,
                starting_time=starting_time,
                rate=rate,
                resolution=np.nan,
            )
            pos_obj.add_spatial_series(spatial_series_object)
        get_module(nwbfile=nwbfile,
                   name="behavior",
                   description="Contains processed behavioral data."
                   ).add_data_interface(pos_obj)

        linearized_pos = mat_file["whlrld"][:, 6]
        lin_pos_obj = Position(name="LinearizedPosition")
        lin_spatial_series_object = SpatialSeries(
            name="LinearizedTimeSeries",
            description=
            ("Linearized position, with '1' defined as start position (the position at the time of last nose-poking "
             "in the trial), and d=2 being the end position (position at the tiome just before reward consumption). "
             "d=0 means subject is not performing working memory trials."),
            data=H5DataIO(linearized_pos, compression="gzip"),
            reference_frame="unknown",
            conversion=conversion,
            starting_time=starting_time,
            rate=rate,
            resolution=np.nan,
        )
        lin_pos_obj.add_spatial_series(lin_spatial_series_object)
        get_module(nwbfile=nwbfile,
                   name="behavior").add_data_interface(lin_pos_obj)