def yuta2nwb(session_path='/Users/bendichter/Desktop/Buzsaki/SenzaiBuzsaki2017/YutaMouse41/YutaMouse41-150903', subject_xls=None, include_spike_waveforms=True, stub=True): subject_path, session_id = os.path.split(session_path) fpath_base = os.path.split(subject_path)[0] identifier = session_id mouse_number = session_id[9:11] if '-' in session_id: subject_id, date_text = session_id.split('-') b = False else: subject_id, date_text = session_id.split('b') b = True if subject_xls is None: subject_xls = os.path.join(subject_path, 'YM' + mouse_number + ' exp_sheet.xlsx') else: if not subject_xls[-4:] == 'xlsx': subject_xls = os.path.join(subject_xls, 'YM' + mouse_number + ' exp_sheet.xlsx') session_start_time = dateparse(date_text, yearfirst=True) df = pd.read_excel(subject_xls) subject_data = {} for key in ['genotype', 'DOB', 'implantation', 'Probe', 'Surgery', 'virus injection', 'mouseID']: names = df.iloc[:, 0] if key in names.values: subject_data[key] = df.iloc[np.argmax(names == key), 1] if isinstance(subject_data['DOB'], datetime): age = session_start_time - subject_data['DOB'] else: age = None subject = Subject(subject_id=subject_id, age=str(age), genotype=subject_data['genotype'], species='mouse') nwbfile = NWBFile(session_description='mouse in open exploration and theta maze', identifier=identifier, session_start_time=session_start_time.astimezone(), file_create_date=datetime.now().astimezone(), experimenter='Yuta Senzai', session_id=session_id, institution='NYU', lab='Buzsaki', subject=subject, related_publications='DOI:10.1016/j.neuron.2016.12.011') print('reading and writing raw position data...', end='', flush=True) ns.add_position_data(nwbfile, session_path) shank_channels = ns.get_shank_channels(session_path)[:8] all_shank_channels = np.concatenate(shank_channels) print('setting up electrodes...', end='', flush=True) hilus_csv_path = os.path.join(fpath_base, 'early_session_hilus_chans.csv') lfp_channel = get_reference_elec(subject_xls, hilus_csv_path, session_start_time, session_id, b=b) print(lfp_channel) custom_column = [{'name': 'theta_reference', 'description': 'this electrode was used to calculate LFP canonical bands', 'data': all_shank_channels == lfp_channel}] ns.write_electrode_table(nwbfile, session_path, custom_columns=custom_column, max_shanks=max_shanks) print('reading LFPs...', end='', flush=True) lfp_fs, all_channels_data = ns.read_lfp(session_path, stub=stub) lfp_data = all_channels_data[:, all_shank_channels] print('writing LFPs...', flush=True) # lfp_data[:int(len(lfp_data)/4)] lfp_ts = ns.write_lfp(nwbfile, lfp_data, lfp_fs, name='lfp', description='lfp signal for all shank electrodes') for name, channel in special_electrode_dict.items(): ts = TimeSeries(name=name, description='environmental electrode recorded inline with neural data', data=all_channels_data[channel], rate=lfp_fs, unit='V', conversion=np.nan, resolution=np.nan) nwbfile.add_acquisition(ts) # compute filtered LFP print('filtering LFP...', end='', flush=True) all_lfp_phases = [] for passband in ('theta', 'gamma'): lfp_fft = filter_lfp(lfp_data[:, all_shank_channels == lfp_channel].ravel(), lfp_fs, passband=passband) lfp_phase, _ = hilbert_lfp(lfp_fft) all_lfp_phases.append(lfp_phase[:, np.newaxis]) data = np.dstack(all_lfp_phases) print('done.', flush=True) if include_spike_waveforms: print('writing waveforms...', end='', flush=True) for shankn in np.arange(1, 9, dtype=int): ns.write_spike_waveforms(nwbfile, session_path, shankn, stub=stub) print('done.', flush=True) decomp_series = DecompositionSeries(name='LFPDecompositionSeries', description='Theta and Gamma phase for reference LFP', data=data, rate=lfp_fs, source_timeseries=lfp_ts, metric='phase', unit='radians') decomp_series.add_band(band_name='theta', band_limits=(4, 10)) decomp_series.add_band(band_name='gamma', band_limits=(30, 80)) check_module(nwbfile, 'ecephys', 'contains processed extracellular electrophysiology data').add_data_interface(decomp_series) [nwbfile.add_stimulus(x) for x in ns.get_events(session_path)] # create epochs corresponding to experiments/environments for the mouse sleep_state_fpath = os.path.join(session_path, '{}--StatePeriod.mat'.format(session_id)) exist_pos_data = any(os.path.isfile(os.path.join(session_path, '{}__{}.mat'.format(session_id, task_type['name']))) for task_type in task_types) if exist_pos_data: nwbfile.add_epoch_column('label', 'name of epoch') for task_type in task_types: label = task_type['name'] file = os.path.join(session_path, session_id + '__' + label + '.mat') if os.path.isfile(file): print('loading position for ' + label + '...', end='', flush=True) pos_obj = Position(name=label + '_position') matin = loadmat(file) tt = matin['twhl_norm'][:, 0] exp_times = find_discontinuities(tt) if 'conversion' in task_type: conversion = task_type['conversion'] else: conversion = np.nan for pos_type in ('twhl_norm', 'twhl_linearized'): if pos_type in matin: pos_data_norm = matin[pos_type][:, 1:] spatial_series_object = SpatialSeries( name=label + '_{}_spatial_series'.format(pos_type), data=H5DataIO(pos_data_norm, compression='gzip'), reference_frame='unknown', conversion=conversion, resolution=np.nan, timestamps=H5DataIO(tt, compression='gzip')) pos_obj.add_spatial_series(spatial_series_object) check_module(nwbfile, 'behavior', 'contains processed behavioral data').add_data_interface(pos_obj) for i, window in enumerate(exp_times): nwbfile.add_epoch(start_time=window[0], stop_time=window[1], label=label + '_' + str(i)) print('done.') # there are occasional mismatches between the matlab struct and the neuroscope files # regions: 3: 'CA3', 4: 'DG' df_unit_features = get_UnitFeatureCell_features(fpath_base, session_id, session_path) celltype_names = [] for celltype_id, region_id in zip(df_unit_features['fineCellType'].values, df_unit_features['region'].values): if celltype_id == 1: if region_id == 3: celltype_names.append('pyramidal cell') elif region_id == 4: celltype_names.append('granule cell') else: raise Exception('unknown type') elif not np.isfinite(celltype_id): celltype_names.append('missing') else: celltype_names.append(celltype_dict[celltype_id]) custom_unit_columns = [ { 'name': 'cell_type', 'description': 'name of cell type', 'data': celltype_names}, { 'name': 'global_id', 'description': 'global id for cell for entire experiment', 'data': df_unit_features['unitID'].values}, { 'name': 'max_electrode', 'description': 'electrode that has the maximum amplitude of the waveform', 'data': get_max_electrodes(nwbfile, session_path), 'table': nwbfile.electrodes }] ns.add_units(nwbfile, session_path, custom_unit_columns, max_shanks=max_shanks) trialdata_path = os.path.join(session_path, session_id + '__EightMazeRun.mat') if os.path.isfile(trialdata_path): trials_data = loadmat(trialdata_path)['EightMazeRun'] trialdatainfo_path = os.path.join(fpath_base, 'EightMazeRunInfo.mat') trialdatainfo = [x[0] for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0]] features = trialdatainfo[:7] features[:2] = 'start_time', 'stop_time', [nwbfile.add_trial_column(x, 'description') for x in features[4:] + ['condition']] for trial_data in trials_data: if trial_data[3]: cond = 'run_left' else: cond = 'run_right' nwbfile.add_trial(start_time=trial_data[0], stop_time=trial_data[1], condition=cond, error_run=trial_data[4], stim_run=trial_data[5], both_visit=trial_data[6]) """ mono_syn_fpath = os.path.join(session_path, session_id+'-MonoSynConvClick.mat') matin = loadmat(mono_syn_fpath) exc = matin['FinalExcMonoSynID'] inh = matin['FinalInhMonoSynID'] #exc_obj = CatCellInfo(name='excitatory_connections', # indices_values=[], cell_index=exc[:, 0] - 1, indices=exc[:, 1] - 1) #module_cellular.add_container(exc_obj) #inh_obj = CatCellInfo(name='inhibitory_connections', # indices_values=[], cell_index=inh[:, 0] - 1, indices=inh[:, 1] - 1) #module_cellular.add_container(inh_obj) """ if os.path.isfile(sleep_state_fpath): matin = loadmat(sleep_state_fpath)['StatePeriod'] table = TimeIntervals(name='states', description='sleep states of animal') table.add_column(name='label', description='sleep state') data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append({'start_time': row[0], 'stop_time': row[1], 'label': name}) [table.add_row(**row) for row in sorted(data, key=lambda x: x['start_time'])] check_module(nwbfile, 'behavior', 'contains behavioral data').add_data_interface(table) if stub: out_fname = session_path + '_stub.nwb' else: out_fname = session_path + '.nwb' print('writing NWB file...', end='', flush=True) with NWBHDF5IO(out_fname, mode='w') as io: io.write(nwbfile) print('done.') print('testing read...', end='', flush=True) # test read with NWBHDF5IO(out_fname, mode='r') as io: io.read() print('done.')
def run_conversion(self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool = False): session_path = Path(self.source_data["folder_path"]) session_id = session_path.stem # Stimuli [nwbfile.add_stimulus(x) for x in get_events(session_path)] # States sleep_state_fpath = session_path / f"{session_id}.SleepState.states.mat" # label renaming specific to Peyrache state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM") if sleep_state_fpath.is_file(): matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0] table = TimeIntervals(name="states", description="Sleep states of animal.") table.add_column(name="label", description="Sleep state.") data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append( dict(start_time=row[0], stop_time=row[1], label=state_label_names[name])) [ table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"]) ] check_module(nwbfile, "behavior", "Contains behavioral data.").add(table) # Position pos_names = ["RedLED", "BlueLED"] pos_idx_from = [0, 2] pos_idx_to = [2, 4] # Raw position whlfile_path = session_path / f"{session_id}.whl" whl_data = np.loadtxt(whlfile_path) for name, idx_from, idx_to in zip(pos_names, pos_idx_from, pos_idx_to): nwbfile.add_acquisition( peyrache_spatial_series( name=name, description= "Raw sensor data. Values of -1 indicate that LED detection failed.", data=whl_data[:, idx_from:idx_to], conversion=np.nan, # whl file is in arbitrary grid units )) # Processed position posfile_path = session_path / f"{session_id}.pos" if posfile_path.is_file( ): # at least Mouse32-140820 was missing a .pos file try: pos_data = np.loadtxt(posfile_path) pos_obj = Position(name="SubjectPosition") for name, idx_from, idx_to in zip(pos_names, pos_idx_from, pos_idx_to): pos_obj.add_spatial_series( peyrache_spatial_series( name=name, description= ("(x,y) coordinates tracking subject movement through the maze." "Values of -1 indicate that LED detection failed." ), data=pos_data[:, idx_from:idx_to], conversion=1e-2, # from cm to m )) check_module(nwbfile, "behavior", "Contains behavioral data.").add(pos_obj) except ValueError: # data issue present in at least Mouse17-170201 warn(f"Skipping .pos file for session {session_id}!") # Epochs - only available for sessions with raw data epoch_file = session_path / "raw" / f"{session_id}-raw-info" / f"{session_id}-behaviors.txt" if epoch_file.is_file(): epoch_data = pd.read_csv(epoch_file, header=1)[f"{session_id}:"] epoch_dat_inds = [] epoch_names = [] for epochs in epoch_data: inds, name = epochs.split(": ") epoch_dat_inds.append(inds.split(" ")) epoch_names.append(name) epoch_windows = [0] for epoch in epoch_dat_inds: exp_end_times = [] for dat_ind in epoch: recording_file = session_path / "raw" / f"{session_id}{dat_ind}.dat" info_extractor = NeuroscopeRecordingExtractor( recording_file) dat_end_time = info_extractor.get_num_frames( ) / info_extractor.get_sampling_frequency() # seconds exp_end_times.extend([dat_end_time]) epoch_windows.extend([epoch_windows[-1] + sum(exp_end_times)] * 2) epoch_windows = np.array(epoch_windows[:-1]).reshape(-1, 2) for j, epoch_name in enumerate(epoch_names): nwbfile.add_epoch(start_time=epoch_windows[j][0], stop_time=epoch_windows[j][1], tags=[epoch_name])
def convert_data( self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool = False, include_spike_waveforms: bool = False, ): """Convert the behavioral portion of a particular session of the GrosmarkAD dataset.""" session_path = self.input_args["folder_path"] subject_path, session_id = os.path.split(session_path) # Stimuli [nwbfile.add_stimulus(x) for x in get_events(session_path)] # States sleep_state_fpath = os.path.join(session_path, "{session_id}.SleepState.states.mat") # label renaming specific to Watson state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM") if os.path.isfile(sleep_state_fpath): matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0] table = TimeIntervals(name="states", description="Sleep states of animal.") table.add_column(name="label", description="Sleep state.") data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append( dict( start_time=row[0], stop_time=row[1], label=state_label_names[name], )) [ table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"]) ] check_module(nwbfile, "behavior", "contains behavioral data").add_data_interface(table) # Position pos_filepath = Path( session_path) / f"{session_id}.position.behavior.mat" pos_mat = loadmat(str(pos_filepath.absolute())) starting_time = float( pos_mat["position"]["timestamps"][0][0] [0]) # confirmed to be a regularly sampled series rate = float( pos_mat["position"]["timestamps"][0][0][1]) - starting_time if pos_mat["position"]["units"][0][0][0] == "m": conversion = 1.0 else: warnings.warn( f"Spatial units ({pos_mat['position']['units'][0][0][0]}) not listed in meters; " "setting conversion to nan.") conversion = np.nan pos_data = [[x[0], y[0]] for x, y in zip( pos_mat["position"]["position"][0][0]["x"][0][0], pos_mat["position"]["position"][0][0]["y"][0][0], )] linearized_data = [[ lin[0] ] for lin in pos_mat["position"]["position"][0][0]["lin"][0][0]] label = pos_mat["position"]["behaviorinfo"][0][0]["MazeType"][0][0][ 0].replace(" ", "") pos_obj = Position(name=f"{label}Position") spatial_series_object = SpatialSeries( name=f"{label}SpatialSeries", description= "(x,y) coordinates tracking subject movement through the maze.", data=H5DataIO(pos_data, compression="gzip"), reference_frame="unknown", conversion=conversion, starting_time=starting_time, rate=rate, resolution=np.nan, ) pos_obj.add_spatial_series(spatial_series_object) check_module( nwbfile, "behavior", "contains processed behavioral data").add_data_interface(pos_obj) lin_pos_obj = Position(name=f"{label}LinearizedPosition") lin_spatial_series_object = SpatialSeries( name=f"{label}LinearizedTimeSeries", description= "Linearized position, defined as starting at the edge of reward area, " "and increasing clockwise, terminating at the opposing edge of the reward area.", data=H5DataIO(linearized_data, compression="gzip"), reference_frame="unknown", conversion=conversion, starting_time=starting_time, rate=rate, resolution=np.nan, ) lin_pos_obj.add_spatial_series(lin_spatial_series_object) check_module(nwbfile, "behavior", "contains processed behavioral data").add_data_interface( lin_pos_obj) # Epochs epoch_names = list(pos_mat["position"]["Epochs"][0][0].dtype.names) epoch_windows = [[float(start), float(stop)] for x in pos_mat["position"]["Epochs"][0][0][0][0] for start, stop in x] nwbfile.add_epoch_column("label", "name of epoch") for j, epoch_name in enumerate(epoch_names): nwbfile.add_epoch( start_time=epoch_windows[j][0], stop_time=epoch_windows[j][1], label=epoch_name, )
def convert_data( self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool = False, include_spike_waveforms: bool = False ): session_path = self.input_args["folder_path"] # TODO: check/enforce format? task_types = metadata_dict.get("task_types", []) subject_path, session_id = os.path.split(session_path) fpath_base = os.path.split(subject_path)[0] [nwbfile.add_stimulus(x) for x in get_events(session_path)] exist_pos_data = any( os.path.isfile(os.path.join(session_path, "{}__{}.mat".format(session_id, task_type["name"]))) for task_type in task_types ) if exist_pos_data: nwbfile.add_epoch_column("label", "name of epoch") for task_type in task_types: label = task_type["name"] file = os.path.join(session_path, session_id + "__" + label + ".mat") if os.path.isfile(file): pos_obj = Position(name=label + "_position") matin = loadmat(file) tt = matin["twhl_norm"][:, 0] exp_times = find_discontinuities(tt) if "conversion" in task_type: conversion = task_type["conversion"] else: conversion = np.nan for pos_type in ("twhl_norm", "twhl_linearized"): if pos_type in matin: pos_data_norm = matin[pos_type][:, 1:] spatial_series_object = SpatialSeries( name=label + "_{}_spatial_series".format(pos_type), data=H5DataIO(pos_data_norm, compression="gzip"), reference_frame="unknown", conversion=conversion, resolution=np.nan, timestamps=H5DataIO(tt, compression="gzip"), ) pos_obj.add_spatial_series(spatial_series_object) check_module(nwbfile, "behavior", "contains processed behavioral data").add_data_interface(pos_obj) for i, window in enumerate(exp_times): nwbfile.add_epoch(start_time=window[0], stop_time=window[1], label=label + "_" + str(i)) trialdata_path = os.path.join(session_path, session_id + "__EightMazeRun.mat") if os.path.isfile(trialdata_path): trials_data = loadmat(trialdata_path)["EightMazeRun"] trialdatainfo_path = os.path.join(fpath_base, "EightMazeRunInfo.mat") trialdatainfo = [x[0] for x in loadmat(trialdatainfo_path)["EightMazeRunInfo"][0]] features = trialdatainfo[:7] features[:2] = ( "start_time", "stop_time", ) [nwbfile.add_trial_column(x, "description") for x in features[4:] + ["condition"]] for trial_data in trials_data: if trial_data[3]: cond = "run_left" else: cond = "run_right" nwbfile.add_trial( start_time=trial_data[0], stop_time=trial_data[1], condition=cond, error_run=trial_data[4], stim_run=trial_data[5], both_visit=trial_data[6], ) sleep_state_fpath = os.path.join(session_path, "{}.SleepState.states.mat".format(session_id)) # label renaming specific to Watson state_label_names = {"WAKEstate": "Awake", "NREMstate": "Non-REM", "REMstate": "REM"} if os.path.isfile(sleep_state_fpath): matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0] table = TimeIntervals(name="states", description="Sleep states of animal.") table.add_column(name="label", description="Sleep state.") data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append({"start_time": row[0], "stop_time": row[1], "label": state_label_names[name]}) [table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"])] check_module(nwbfile, "behavior", "contains behavioral data").add_data_interface(table)
def run_conversion( self, nwbfile: NWBFile, metadata: dict, stub_test: bool = False, ): session_path = Path(self.source_data["folder_path"]) session_id = session_path.name # Stimuli [ nwbfile.add_stimulus(x) for x in get_events( session_path=session_path, suffixes=[".lrw.evt", ".puf.evt", ".rip.evt", ".rrw.evt"]) ] # Epochs df = pd.read_csv(session_path / f"{session_id}.cat.evt", sep=" ", names=("time", "begin_or_end", "of", "epoch_name")) epoch_starts = [] for j in range(int(len(df) / 2)): epoch_starts.append(df["time"][2 * j]) nwbfile.add_epoch(start_time=epoch_starts[j], stop_time=df["time"][2 * j + 1], tags=[df["epoch_name"][2 * j][18:]]) # Trials trialdata_path = session_path / f"{session_id}-TrackRunTimes.mat" if trialdata_path.is_file(): trials_data = loadmat(trialdata_path)["trackruntimes"] for trial_data in trials_data: nwbfile.add_trial(start_time=trial_data[0], stop_time=trial_data[1]) # .whl position whl_files = [] for whl_file in whl_files: add_position_data(nwbfile=nwbfile, session_path=session_path, whl_file_path=whl_file, starting_time=epoch_starts[j]) # States sleep_state_fpath = session_path / f"{session_id}.SleepState.states.mat" # label renaming state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM") if sleep_state_fpath.is_file(): matin = loadmat(sleep_state_fpath)["SleepState"]["ints"][0][0] table = TimeIntervals(name="states", description="Sleep states of animal.") table.add_column(name="label", description="Sleep state.") data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append( dict(start_time=row[0], stop_time=row[1], label=state_label_names[name])) [ table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"]) ] check_module(nwbfile, "behavior", "Contains behavioral data.").add(table)
def run_conversion(self, nwbfile: NWBFile, metadata: dict, stub_test: bool = False): session_path = Path(self.source_data["folder_path"]) task_types = [ dict(name="OpenFieldPosition_ExtraLarge"), dict(name="OpenFieldPosition_New_Curtain", conversion=0.46), dict(name="OpenFieldPosition_New", conversion=0.46), dict(name="OpenFieldPosition_Old_Curtain", conversion=0.46), dict(name="OpenFieldPosition_Old", conversion=0.46), dict(name="OpenFieldPosition_Oldlast", conversion=0.46), dict(name="EightMazePosition", conversion=0.65 / 2), ] subject_path = session_path.parent session_id = session_path.stem [nwbfile.add_stimulus(x) for x in get_events(session_path)] sleep_state_fpath = session_path / f"{session_id}--StatePeriod.mat" exist_pos_data = any([ (session_path / "{session_id}__{task_type['name']}.mat").is_file() for task_type in task_types ]) if exist_pos_data: nwbfile.add_epoch_column("label", "Name of epoch.") # Epoch intervals for task_type in task_types: label = task_type["name"] file = session_path / f"{session_id}__{label}.mat" if file.is_file(): pos_obj = Position(name=f"{label}_position") matin = loadmat(file) tt = matin["twhl_norm"][:, 0] exp_times = find_discontinuities(tt) if "conversion" in task_type: conversion = task_type["conversion"] else: conversion = np.nan for pos_type in ("twhl_norm", "twhl_linearized"): if pos_type in matin: pos_data_norm = matin[pos_type][:, 1:] spatial_series_object = SpatialSeries( name=f"{label}_{pos_type}_spatial_series", data=H5DataIO(pos_data_norm, compression="gzip"), reference_frame="unknown", conversion=conversion, resolution=np.nan, timestamps=H5DataIO(tt, compression="gzip"), ) pos_obj.add_spatial_series(spatial_series_object) check_module( nwbfile, "behavior", "Contains processed behavioral data.").add_data_interface( pos_obj) for i, window in enumerate(exp_times): nwbfile.add_epoch( start_time=window[0], stop_time=window[1], tags=f"{label}_{str(i)}", ) # Trial intervals trialdata_path = session_path / f"{session_id}__EightMazeRun.mat" if trialdata_path.is_file(): trials_data = loadmat(trialdata_path)["EightMazeRun"] trialdatainfo_path = subject_path / "EightMazeRunInfo.mat" trialdatainfo = [ x[0] for x in loadmat(trialdatainfo_path)["EightMazeRunInfo"][0] ] features = trialdatainfo[:7] features[:2] = ( "start_time", "stop_time", ) [ nwbfile.add_trial_column(x, "description") for x in features[4:] + ["condition"] ] for trial_data in trials_data: if trial_data[3]: cond = "run_left" else: cond = "run_right" nwbfile.add_trial( start_time=trial_data[0], stop_time=trial_data[1], condition=cond, error_run=trial_data[4], stim_run=trial_data[5], both_visit=trial_data[6], ) # SLeep states if sleep_state_fpath.is_file(): matin = loadmat(sleep_state_fpath)["StatePeriod"] table = TimeIntervals(name="states", description="sleep states of animal") table.add_column(name="label", description="sleep state") data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append( dict(start_time=row[0], stop_time=row[1], label=name)) [ table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"]) ] check_module(nwbfile, "behavior", "Contains behavioral data.").add_data_interface(table)
def run_conversion(self, nwbfile: NWBFile, metadata: dict): session_path = Path(self.source_data["folder_path"]) session_id = session_path.stem # Load the file with behavioral data behavior_file_path = Path(session_path) / f"{session_id}.behavior.mat" behavior_mat = read_matlab_file(str(behavior_file_path))["behavior"] # Add trials events = behavior_mat["events"] trial_interval_list = events["trialIntervals"] data = [] for start_time, stop_time in trial_interval_list: data.append( dict( start_time=float(start_time), stop_time=float(stop_time), )) [ nwbfile.add_trial(**row) for row in sorted(data, key=lambda x: x["start_time"]) ] trial_list = events["trials"] direction_list = [trial.get("direction", "") for trial in trial_list] trial_type_list = [trial.get("type", "") for trial in trial_list] if not all([direction == "" for direction in direction_list]): nwbfile.add_trial_column(name="direction", description="direction of the trial", data=direction_list) if not all([trial_type == "" for trial_type in trial_type_list]): nwbfile.add_trial_column(name="trial_type", description="type of trial", data=trial_type_list) # Position module_name = "behavior" module_description = "Contains behavioral data concerning position." processing_module = get_module(nwbfile=nwbfile, name=module_name, description=module_description) timestamps = np.array(behavior_mat["timestamps"])[..., 0] position = behavior_mat["position"] pos_data = [[x, y, z] for (x, y, z) in zip(position["x"], position["y"], position["y"]) ] pos_data = np.array(pos_data)[..., 0] unit = behavior_mat.get("units", "") if unit == ["m", "meter", "meters"]: conversion = 1.0 else: warnings.warn(f"Spatial units {unit} not listed in meters; " "setting conversion to nan.") conversion = np.nan description = behavior_mat.get("description", "generic_position_tracking").replace( "/", "-") rotation_type = behavior_mat.get("rotationType", "non_specified") pos_obj = Position(name=f"{description}_task".replace(" ", "_")) spatial_series_object = SpatialSeries( name="position", description="(x,y,z) coordinates tracking subject movement.", data=H5DataIO(pos_data, compression="gzip"), reference_frame="unknown", unit=unit, conversion=conversion, timestamps=timestamps, resolution=np.nan, ) pos_obj.add_spatial_series(spatial_series_object) # Add error if available errorPerMarker = behavior_mat.get("errorPerMarker", None) if errorPerMarker: error_data = np.array([error for error in errorPerMarker])[..., 0] spatial_series_object = SpatialSeries( name="error_per_marker", description= "Estimated error for marker tracking from optitrack system.", data=H5DataIO(error_data, compression="gzip"), reference_frame="unknown", conversion=conversion, timestamps=timestamps, resolution=np.nan, ) pos_obj.add_spatial_series(spatial_series_object) processing_module.add_data_interface(pos_obj) # Compass try: orientation = behavior_mat["orientation"] orientation_data = [[ x, y, z, w ] for (x, y, z, w) in zip(orientation["x"], orientation["y"], orientation["z"], orientation["w"])] orientation_data = np.array(orientation_data)[..., 0] compass_obj = CompassDirection(name=f"allocentric_frame_tracking") spatial_series_object = SpatialSeries( name="orientation", description= f"(x, y, z, w) orientation coordinates, orientation type: {rotation_type}", data=H5DataIO(orientation_data, compression="gzip"), reference_frame="unknown", conversion=conversion, timestamps=timestamps, resolution=np.nan, ) compass_obj.add_spatial_series(spatial_series_object) processing_module.add_data_interface(compass_obj) except KeyError: warnings.warn(f"Orientation data not found") # States module_name = "ecephys" module_description = "Contains behavioral data concerning classified states." processing_module = get_module(nwbfile=nwbfile, name=module_name, description=module_description) # Sleep states sleep_file_path = session_path / f"{session_id}.SleepState.states.mat" if Path(sleep_file_path).exists(): mat_file = read_matlab_file(sleep_file_path) state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM", MAstate="MA") sleep_state_dic = mat_file["SleepState"]["ints"] table = TimeIntervals(name="sleep_states", description="Sleep state of the animal.") table.add_column(name="label", description="Sleep state.") data = [] for sleep_state in state_label_names: values = sleep_state_dic[sleep_state] if len(values) != 0 and isinstance(values[0], int): values = [values] for start_time, stop_time in values: data.append( dict( start_time=float(start_time), stop_time=float(stop_time), label=state_label_names[sleep_state], )) [ table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"]) ] processing_module.add(table) # Add epochs lfp_file_path = session_path / f"{session_path.name}.lfp" raw_file_path = session_path / f"{session_id}.dat" xml_file_path = session_path / f"{session_id}.xml" if raw_file_path.is_file(): recorder = NeuroscopeRecordingExtractor( file_path=raw_file_path, xml_file_path=xml_file_path) else: recorder = NeuroscopeRecordingExtractor( file_path=lfp_file_path, xml_file_path=xml_file_path) num_frames = recorder.get_num_frames() sampling_frequency = recorder.get_sampling_frequency() end_of_the_session = num_frames / sampling_frequency session_start = 0.0 start_trials_time = min( [interval[0] for interval in trial_interval_list]) end_trials_time = max( [interval[1] for interval in trial_interval_list]) end_of_the_session = end_of_the_session nwbfile.add_epoch(start_time=session_start, stop_time=start_trials_time, tags="before trials") nwbfile.add_epoch(start_time=start_trials_time, stop_time=end_trials_time, tags="during trials") nwbfile.add_epoch(start_time=end_trials_time, stop_time=end_of_the_session, tags="after trials")
def convert_data(self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool = False, include_spike_waveforms: bool = False): session_path = self.input_args['folder_path'] # TODO: check/enforce format? task_types = metadata_dict['task_types'] subject_path, session_id = os.path.split(session_path) fpath_base = os.path.split(subject_path)[0] [nwbfile.add_stimulus(x) for x in get_events(session_path)] sleep_state_fpath = os.path.join( session_path, '{}--StatePeriod.mat'.format(session_id)) exist_pos_data = any( os.path.isfile( os.path.join( session_path, '{}__{}.mat'.format(session_id, task_type['name']))) for task_type in task_types) if exist_pos_data: nwbfile.add_epoch_column('label', 'name of epoch') for task_type in task_types: label = task_type['name'] file = os.path.join(session_path, session_id + '__' + label + '.mat') if os.path.isfile(file): pos_obj = Position(name=label + '_position') matin = loadmat(file) tt = matin['twhl_norm'][:, 0] exp_times = find_discontinuities(tt) if 'conversion' in task_type: conversion = task_type['conversion'] else: conversion = np.nan for pos_type in ('twhl_norm', 'twhl_linearized'): if pos_type in matin: pos_data_norm = matin[pos_type][:, 1:] spatial_series_object = SpatialSeries( name=label + '_{}_spatial_series'.format(pos_type), data=H5DataIO(pos_data_norm, compression='gzip'), reference_frame='unknown', conversion=conversion, resolution=np.nan, timestamps=H5DataIO(tt, compression='gzip')) pos_obj.add_spatial_series(spatial_series_object) check_module( nwbfile, 'behavior', 'contains processed behavioral data').add_data_interface( pos_obj) for i, window in enumerate(exp_times): nwbfile.add_epoch(start_time=window[0], stop_time=window[1], label=label + '_' + str(i)) trialdata_path = os.path.join(session_path, session_id + '__EightMazeRun.mat') if os.path.isfile(trialdata_path): trials_data = loadmat(trialdata_path)['EightMazeRun'] trialdatainfo_path = os.path.join(fpath_base, 'EightMazeRunInfo.mat') trialdatainfo = [ x[0] for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0] ] features = trialdatainfo[:7] features[:2] = 'start_time', 'stop_time', [ nwbfile.add_trial_column(x, 'description') for x in features[4:] + ['condition'] ] for trial_data in trials_data: if trial_data[3]: cond = 'run_left' else: cond = 'run_right' nwbfile.add_trial(start_time=trial_data[0], stop_time=trial_data[1], condition=cond, error_run=trial_data[4], stim_run=trial_data[5], both_visit=trial_data[6]) if os.path.isfile(sleep_state_fpath): matin = loadmat(sleep_state_fpath)['StatePeriod'] table = TimeIntervals(name='states', description='sleep states of animal') table.add_column(name='label', description='sleep state') data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append({ 'start_time': row[0], 'stop_time': row[1], 'label': name }) [ table.add_row(**row) for row in sorted(data, key=lambda x: x['start_time']) ] check_module(nwbfile, 'behavior', 'contains behavioral data').add_data_interface(table)
def run_conversion(self, nwbfile: NWBFile, metadata: dict): session_path = Path(self.source_data["folder_path"]) session_id = session_path.stem module_name = "Neural states" module_description = "Contains behavioral data concerning classified states." processing_module = get_module(nwbfile=nwbfile, name=module_name, description=module_description) # Sleep states sleep_file_path = session_path / f"{session_id}.SleepState.states.mat" if Path(sleep_file_path).exists(): mat_file = read_matlab_file(sleep_file_path) state_label_names = dict(WAKEstate="Awake", NREMstate="Non-REM", REMstate="REM", MAstate="MA") sleep_state_dic = mat_file["SleepState"]["ints"] table = TimeIntervals(name="Sleep states", description="Sleep state of the animal.") table.add_column(name="label", description="Sleep state.") data = [] for sleep_state in state_label_names: values = sleep_state_dic[sleep_state] if len(values) != 0 and isinstance(values[0], int): values = [values] for start_time, stop_time in values: data.append( dict( start_time=float(start_time), stop_time=float(stop_time), label=state_label_names[sleep_state], )) [ table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"]) ] processing_module.add(table) # Up and down states behavioral_file_path = session_path / f"{session_id}.SlowWaves.events.mat" behavioral_file = read_matlab_file(behavioral_file_path) table = TimeIntervals( name="Up-Down states", description="Up and down states classified by LFP.") table.add_column(name="label", description="state.") data = [] up_and_down_intervals_dic = behavioral_file["SlowWaves"]["ints"] for state, values in up_and_down_intervals_dic.items(): for start_time, stop_time in values: data.append( dict(start_time=float(start_time), stop_time=float(stop_time), label=state)) [ table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"]) ] processing_module.add(table) # Laser diode and visual laser laser_details = dict( LaserDiode=dict(name="Laser diode", description="Laser pulses for optogenetics."), VisualLaser=dict( name="Visual laser", description="Laser pulses for subject stimulation."), ) for laser_type, laser_detail in laser_details.items(): laser_file_path = session_path / f"{session_id}_Pulses_{laser_type}.mat" if laser_file_path.exists(): laser_file = read_matlab_file(laser_file_path) table = TimeIntervals(name=laser_detail["name"], description=laser_detail["description"]) table.add_column(name="amplitude", description="Amplitude of the laser pulse.") data = [] laser_pulses = laser_file["Pulses"]["periods"] amplitudes = laser_file["Pulses"]["amplitude"] for interval, amplitude in zip(laser_pulses, amplitudes): data.append( dict(start_time=float(interval[0]), stop_time=float(interval[1]), amplitude=amplitude)) [ table.add_row(**row) for row in sorted(data, key=lambda x: x["start_time"]) ] processing_module.add(table)
def run_conversion(self, nwbfile: NWBFile, metadata: dict): """ Primary conversion function for the custom Feldman lab behavioral interface. Uses the synch information in the nidq_synch_file to set trial times in NWBFile. """ folder_path = Path(self.source_data["folder_path"]) trial_numbers, stimulus_numbers, trial_times_from_nidq = get_trials_info( recording_nidq=SpikeGLXRecordingExtractor( file_path=self.source_data["nidq_synch_file"]), trial_ongoing_channel=self.source_data["trial_ongoing_channel"], event_channel=self.source_data["event_channel"]) if trial_numbers[0] != 0: trial_numbers, stimulus_numbers, trial_times_from_nidq = clip_trials( trial_numbers=trial_numbers, stimulus_numbers=stimulus_numbers, trial_times=trial_times_from_nidq) header_segments = [ x for x in folder_path.iterdir() if "header" in x.name ] first_header = read_csv(header_segments[0], header=None, sep="\t", index_col=0).T nwbfile.trials = TimeIntervals(name="trials", description=str({ x: y.values[0] for x, y in first_header.items() }).replace("'", "\"")) exclude_columns = set(["TrNum", "Segment"]) trial_csv_column_names = dict( StimNum="stimulus_number", StimLayout="stimulus_layout", StimOnsetTime="stimulus_onset_time", StimOrder="stimulus_order", Tone="tone", TrOutcome="trial_outcome", TrType="trial_type", RewardTime="reward_time", RWStartTime="reward_start_time", RWEndTime="reward_end_time", NLicks="number_of_licks", LickInWindow="licks_in_window", Laser="laser_is_on", CumVol="cumulative_volume", CumNRewards="cumulative_number_of_rewards", ISS0Time="ISS0Time", Arm0Time="Arm0Time") trial_csv_column_descriptions = dict( StimNum="The identifier value for stimulus type.", StimLayout= ("The index of the stimulus layout. 1=Std, 2=Trains, 3=IL, 4=Trains+IL, 5=RFMap, 6=2WC, 7=MWS, 8=MWD" ), StimOnsetTime="The time the stimulus was presented.", StimOrder="Index of stimulus ordering.", Tone="Index of the tone.", TrOutcome="The outcome index for each trial.", TrType="The type index for each trial.", RewardTime="Index of reward timing.", RWStartTime="Times when reward began.", RWEndTime="Times when reward ended.", NLicks="Number of licks.", LickInWindow="Number of licks in selected window.", Laser="Boolean indicating laser activity.", CumVol="Cumulative volume.", CumNRewards="Cumulative number of rewards.", ISS0Time="Data needed for spikes.mat roundtrip.", Arm0Time="Data needed for spikes.mat roundtrip.") stimulus_csv_column_names = dict(Time_ms="stimulus_times", Ampl="stimulus_amplitudes", Posn="stimulus_ordinalities") stimulus_column_description = dict( stimulus_elements="Type index of each stimulus element.", stimulus_times="Time of occurrence of each stimulus element.", stimulus_amplitudes= "Amplitudes for each stimulus element. Unknown units.", stimulus_ordinalities= "Ordinal position of the stimulus element in the train.", stimulus_rises="Rises for each stimulus element. Unknown units.", stimulus_gngs="GNGs for the stimulus element. Unknown units.", stimulus_shapes="Shape index of the stimulus element.", stimulus_durations="Duration of the stimulus element in seconds.", stimulus_probabilities= "Probability that the stimulus was presented; 0 if deterministic.", stimulus_piezo_labels= "Manually assigned labels to each stimulus element.") add_trial_columns( nwbfile=nwbfile, trial_csv_column_names=trial_csv_column_names, trial_csv_column_descriptions=trial_csv_column_descriptions, stimulus_column_names=stimulus_column_description.keys(), stimulus_column_description=stimulus_column_description, exclude_columns=exclude_columns) for header_segment in header_segments: header_data = read_csv(header_segment, header=None, sep="\t", index_col=0).T trial_data = read_csv(str(header_segment).replace( "header", "trials"), header=0, sep="\t") if trial_data["TrStartTime"].iloc[-1] == 0 and trial_data[ "TrEndTime"].iloc[-1] == 0: trial_data.drop(trial_data.index[-1], inplace=True) stimulus_data = read_csv(str(header_segment).replace( "header", "stimuli"), header=0, sep="\t") trial_segment_csv_start_times = np.array( trial_data.loc[:, "TrStartTime"]) for csv_column in trial_data: if "Time" in csv_column and not np.all( np.array(trial_data.loc[:, csv_column]) == 0): trial_data.loc[:, csv_column] = ( (np.array(trial_data.loc[:, csv_column]) - trial_segment_csv_start_times) / 1e3 + trial_times_from_nidq[trial_data.loc[:, "TrNum"], 0]) trial_data.loc[:, "Laser"] = trial_data.loc[:, "Laser"].astype(bool) last_trial = 0 m = 0 for j, (trial, offset) in enumerate( zip(stimulus_data.loc[:, "Trial"], stimulus_data.loc[:, "Time_ms"])): if trial == last_trial: m += 1 else: last_trial = trial m = 1 stimulus_data.loc[j, "Time_ms"] = trial_times_from_nidq[ trial, 0] + offset / 1e3 * m add_trials(nwbfile=nwbfile, trial_times=trial_times_from_nidq, trial_data=trial_data, stimulus_data=stimulus_data, header_data=header_data, trial_csv_column_names=trial_csv_column_names, stimulus_csv_column_names=stimulus_csv_column_names, exclude_columns=exclude_columns)
def run_conversion( fpath_in='/Volumes/easystore5T/data/Brunton/subj_01_day_4.h5', fpath_out='/Volumes/easystore5T/data/Brunton/subj_01_day_4.nwb', events_path='C:/Users/micha/Desktop/Brunton Lab Data/event_times.csv', r2_path='C:/Users/micha/Desktop/Brunton Lab Data/full_model_r2.npy', coarse_events_path='C:/Users/micha/Desktop/Brunton Lab Data/coarse_labels/coarse_labels', reach_features_path='C:/Users/micha/Desktop/Brunton Lab Data/behavioral_features.csv', elec_loc_labels_path='elec_loc_labels.csv', special_chans=SPECIAL_CHANNELS, session_description='no description' ): print(f"Converting {fpath_in}...") fname = os.path.split(os.path.splitext(fpath_in)[0])[1] _, subject_id, _, session = fname.split('_') file = File(fpath_in, 'r') nwbfile = NWBFile( session_description=session_description, identifier=str(uuid.uuid4()), session_start_time=datetime.fromtimestamp(file['start_timestamp'][()]), subject=Subject(subject_id=subject_id, species="H**o sapiens"), session_id=session ) # extract electrode groups file_elec_col_names = file['chan_info']['axis1'][:] elec_data = file['chan_info']['block0_values'] re_exp = re.compile("([ a-zA-Z]+)([0-9]+)") channel_labels_dset = file['chan_info']['axis0'] group_names, group_nums = [], [] for i, bytes_ in enumerate(channel_labels_dset): if bytes_ not in special_chans: str_ = bytes_.decode() res = re_exp.match(str_).groups() group_names.append(res[0]) group_nums.append(int(res[1])) is_elec = ~np.isin(channel_labels_dset, special_chans) dset = DatasetView(file['dataset']).lazy_transpose() # add special channels for kwargs in ( dict( name='EOGL', description='Electrooculography for tracking saccades - left', ), dict( name='EOGR', description='Electrooculography for tracking saccades - right', ), dict( name='ECGL', description='Electrooculography for tracking saccades - left', ), dict( name='ECGR', description='Electrooculography for tracking saccades - right', ) ): if kwargs['name'].encode() in channel_labels_dset: nwbfile.add_acquisition( TimeSeries( rate=file['f_sample'][()], conversion=np.nan, unit='V', data=dset[:, list(channel_labels_dset).index(kwargs['name'].encode())], **kwargs ) ) # add electrode groups df = pd.read_csv(elec_loc_labels_path) df_subject = df[df['subject_ID'] == 'subj' + subject_id] electrode_group_descriptions = {row['label']: row['long_name'] for _, row in df_subject.iterrows()} groups_map = dict() for group_name, group_description in electrode_group_descriptions.items(): device = nwbfile.create_device(name=group_name) groups_map[group_name] = nwbfile.create_electrode_group( name=group_name, description=group_description, device=device, location='unknown' ) # add required cols to electrodes table for row, group_name in zip(elec_data[:].T, group_names): nwbfile.add_electrode( x=row[file_elec_col_names == b'X'][0], y=row[file_elec_col_names == b'Y'][0], z=row[file_elec_col_names == b'Z'][0], imp=np.nan, location='unknown', filtering='250 Hz lowpass', group=groups_map[group_name], ) # load r2 values to input into custom cols in electrodes table r2 = np.load(r2_path) low_freq_r2 = np.ravel(r2[int(subject_id)-1, :len(group_names), 0]) high_freq_r2 = np.ravel(r2[int(subject_id)-1, :len(group_names), 1]) # add custom cols to electrodes table elecs_dset = file['chan_info']['block0_values'] def get_data(label): return elecs_dset[file_elec_col_names == label, :].ravel()[is_elec] [nwbfile.add_electrode_column(**kwargs) for kwargs in ( dict( name='standard_deviation', description="standard deviation of each electrode's data for the entire recording period", data=get_data(b'SD_channels') ), dict( name='kurtosis', description="kurtosis of each electrode's data for the entire recording period", data=get_data(b'Kurt_channels') ), dict( name='median_deviation', description="median absolute deviation estimator for standard deviation for each electrode", data=get_data(b'standardizeDenoms') ), dict( name='good', description='good electrodes', data=get_data(b'goodChanInds').astype(bool) ), dict( name='low_freq_R2', description='R^2 for low frequency band on each electrode', data=low_freq_r2 ), dict( name='high_freq_R2', description='R^2 for high frequency band on each electrode', data=high_freq_r2 ) )] # confirm that electrodes table looks right # nwbfile.electrodes.to_dataframe() # add ElectricalSeries elecs_data = dset.lazy_slice[:, is_elec] n_bytes = np.dtype(elecs_data).itemsize nwbfile.add_acquisition( ElectricalSeries( name='ElectricalSeries', data=H5DataIO( data=DataChunkIterator( data=elecs_data, maxshape=elecs_data.shape, buffer_size=int(5000 * 1e6) // elecs_data.shape[1] * n_bytes ), compression='gzip' ), rate=file['f_sample'][()], conversion=1e-6, # data is in uV electrodes=nwbfile.create_electrode_table_region( region=list(range(len(nwbfile.electrodes))), description='all electrodes' ) ) ) # add pose data pose_dset = file['pose_data']['block0_values'] nwbfile.create_processing_module( name='behavior', description='pose data').add( Position( spatial_series=[ SpatialSeries( name=file['pose_data']['axis0'][x_ind][:-2].decode(), data=H5DataIO( data=pose_dset[:, [x_ind, y_ind]], compression='gzip' ), reference_frame='unknown', conversion=np.nan, rate=30. ) for x_ind, y_ind in zip( range(0, pose_dset.shape[1], 2), range(1, pose_dset.shape[1], 2)) ] ) ) # add events events = pd.read_csv(events_path) mask = (events['Subject'] == int(subject_id)) & (events['Recording day'] == int(session)) events = events[mask] timestamps = events['Event time'].values events = events.reset_index() events = Events( name='ReachEvents', description=events['Event type'][0], # Specifies which arm was used timestamps=timestamps, resolution=2e-3, # resolution of the timestamps, i.e., smallest possible difference between timestamps ) # add the Events type to the processing group of the NWB file nwbfile.processing['behavior'].add(events) # add coarse behavioral labels event_fp = f'sub{subject_id}_fullday_{session}' full_fp = coarse_events_path + '//' + event_fp + '.npy' coarse_events = np.load(full_fp, allow_pickle=True) label, data = np.unique(coarse_events, return_inverse=True) transition_idx = np.where(np.diff(data) != 0) start_t = nwbfile.processing["behavior"].data_interfaces["Position"]['L_Wrist'].starting_time rate = nwbfile.processing["behavior"].data_interfaces["Position"]['L_Wrist'].rate times = np.divide(transition_idx, rate) + start_t # 30Hz sampling rate max_time = (np.shape(coarse_events)[0] / rate) + start_t times = np.hstack([start_t, np.ravel(times), max_time]) transition_labels = np.hstack([label[data[transition_idx]], label[data[-1]]]) nwbfile.add_epoch_column(name='labels', description='Coarse behavioral labels') for start_time, stop_time, label in zip(times[:-1], times[1:], transition_labels): nwbfile.add_epoch(start_time=start_time, stop_time=stop_time, labels=label) # add additional reaching features reach_features = pd.read_csv(reach_features_path) mask = (reach_features['Subject'] == int(subject_id)) & (reach_features['Recording day'] == int(session)) reach_features = reach_features[mask] reaches = TimeIntervals(name='reaches', description='Features of each reach') reaches.add_column(name='Reach_magnitude_px', description='Magnitude of reach in pixels') reaches.add_column(name='Reach_angle_degrees', description='Reach angle in degrees') reaches.add_column(name='Onset_speed_px_per_sec', description='Onset speed in pixels / second)') reaches.add_column(name='Speech_ratio', description='rough estimation of whether someone is likely to be speaking ' 'based on a power ratio of audio data; ranges from 0 (no ' 'speech) to 1 (high likelihood of speech)h') reaches.add_column(name='Bimanual_ratio', description='ratio of ipsilateral wrist reach magnitude to the sum of ' 'ipsilateral and contralateral wrist magnitudes; ranges from ' '0 (unimanual/contralateral move only) to 1 (only ipsilateral' ' arm moving); 0.5 indicates bimanual movement') reaches.add_column(name='Bimanual_overlap', description='The amount of ipsilateral and contralateral wrist temporal' 'overlap as a fraction of the entire contralateral movement' ' duration') reaches.add_column(name='Bimanual_class', description='binary feature that classifies each movement event as ' 'unimanual (0) or bimanual (1) based on how close in time a ' 'ipsilateral wrist movement started relative to each ' 'contralateral wrist movement events') for row in reach_features.iterrows(): row_data = row[1] start_time = row_data['Time of day (sec)'] stop_time = start_time + row_data['Reach duration (sec)'] reaches.add_row(start_time=start_time, stop_time=stop_time, Reach_magnitude_px=row_data['Reach magnitude (px)'], Reach_angle_degrees=row_data['Reach angle (degrees)'], Onset_speed_px_per_sec=row_data['Onset speed (px/sec)'], Speech_ratio=row_data['Speech ratio'], Bimanual_ratio=row_data['Bimanual ratio'], Bimanual_overlap=row_data['Bimanual overlap (sec)'], Bimanual_class=row_data['Bimanual class'] ) nwbfile.add_time_intervals(reaches) with NWBHDF5IO(fpath_out, 'w') as io: io.write(nwbfile)
def yuta2nwb( session_path='D:/BuzsakiData/SenzaiY/YutaMouse41/YutaMouse41-150903', # '/Users/bendichter/Desktop/Buzsaki/SenzaiBuzsaki2017/YutaMouse41/YutaMouse41-150903', subject_xls=None, include_spike_waveforms=True, stub=True, cache_spec=True): subject_path, session_id = os.path.split(session_path) fpath_base = os.path.split(subject_path)[0] identifier = session_id mouse_number = session_id[9:11] if '-' in session_id: subject_id, date_text = session_id.split('-') b = False else: subject_id, date_text = session_id.split('b') b = True if subject_xls is None: subject_xls = os.path.join(subject_path, 'YM' + mouse_number + ' exp_sheet.xlsx') else: if not subject_xls[-4:] == 'xlsx': subject_xls = os.path.join(subject_xls, 'YM' + mouse_number + ' exp_sheet.xlsx') session_start_time = dateparse(date_text, yearfirst=True) df = pd.read_excel(subject_xls) subject_data = {} for key in [ 'genotype', 'DOB', 'implantation', 'Probe', 'Surgery', 'virus injection', 'mouseID' ]: names = df.iloc[:, 0] if key in names.values: subject_data[key] = df.iloc[np.argmax(names == key), 1] if isinstance(subject_data['DOB'], datetime): age = session_start_time - subject_data['DOB'] else: age = None subject = Subject(subject_id=subject_id, age=str(age), genotype=subject_data['genotype'], species='mouse') nwbfile = NWBFile( session_description='mouse in open exploration and theta maze', identifier=identifier, session_start_time=session_start_time.astimezone(), file_create_date=datetime.now().astimezone(), experimenter='Yuta Senzai', session_id=session_id, institution='NYU', lab='Buzsaki', subject=subject, related_publications='DOI:10.1016/j.neuron.2016.12.011') print('reading and writing raw position data...', end='', flush=True) ns.add_position_data(nwbfile, session_path) shank_channels = ns.get_shank_channels(session_path)[:8] nshanks = len(shank_channels) all_shank_channels = np.concatenate(shank_channels) print('setting up electrodes...', end='', flush=True) hilus_csv_path = os.path.join(fpath_base, 'early_session_hilus_chans.csv') lfp_channel = get_reference_elec(subject_xls, hilus_csv_path, session_start_time, session_id, b=b) custom_column = [{ 'name': 'theta_reference', 'description': 'this electrode was used to calculate LFP canonical bands', 'data': all_shank_channels == lfp_channel }] ns.write_electrode_table(nwbfile, session_path, custom_columns=custom_column, max_shanks=max_shanks) print('reading raw electrode data...', end='', flush=True) if stub: # example recording extractor for fast testing xml_filepath = os.path.join(session_path, session_id + '.xml') xml_root = et.parse(xml_filepath).getroot() acq_sampling_frequency = float( xml_root.find('acquisitionSystem').find('samplingRate').text) num_channels = 4 num_frames = 10000 X = np.random.normal(0, 1, (num_channels, num_frames)) geom = np.random.normal(0, 1, (num_channels, 2)) X = (X * 100).astype(int) sre = se.NumpyRecordingExtractor( timeseries=X, sampling_frequency=acq_sampling_frequency, geom=geom) else: nre = se.NeuroscopeRecordingExtractor('{}/{}.dat'.format( session_path, session_id)) sre = se.SubRecordingExtractor(nre, channel_ids=all_shank_channels) print('writing raw electrode data...', end='', flush=True) se.NwbRecordingExtractor.add_electrical_series(sre, nwbfile) print('done.') print('reading spiking units...', end='', flush=True) if stub: spike_times = [200, 300, 400] num_frames = 10000 allshanks = [] for k in range(nshanks): SX = se.NumpySortingExtractor() for j in range(len(spike_times)): SX.add_unit(unit_id=j + 1, times=np.sort( np.random.uniform(0, num_frames, spike_times[j]))) allshanks.append(SX) se_allshanks = se.MultiSortingExtractor(allshanks) se_allshanks.set_sampling_frequency(acq_sampling_frequency) else: se_allshanks = se.NeuroscopeMultiSortingExtractor(session_path, keep_mua_units=False) electrode_group = [] for shankn in np.arange(1, nshanks + 1, dtype=int): for id in se_allshanks.sortings[shankn - 1].get_unit_ids(): electrode_group.append(nwbfile.electrode_groups['shank' + str(shankn)]) df_unit_features = get_UnitFeatureCell_features(fpath_base, session_id, session_path) celltype_names = [] for celltype_id, region_id in zip(df_unit_features['fineCellType'].values, df_unit_features['region'].values): if celltype_id == 1: if region_id == 3: celltype_names.append('pyramidal cell') elif region_id == 4: celltype_names.append('granule cell') else: raise Exception('unknown type') elif not np.isfinite(celltype_id): celltype_names.append('missing') else: celltype_names.append(celltype_dict[celltype_id]) # Add custom column data into the SortingExtractor so it can be written by the converter # Note there is currently a hidden assumption that the way in which the NeuroscopeSortingExtractor # merges the cluster IDs matches one-to-one with the get_UnitFeatureCell_features extraction property_descriptions = { 'cell_type': 'name of cell type', 'global_id': 'global id for cell for entire experiment', 'shank_id': '0-indexed id of cluster of shank', 'electrode_group': 'the electrode group that each spike unit came from' } property_values = { 'cell_type': celltype_names, 'global_id': df_unit_features['unitID'].values, 'shank_id': [x - 2 for x in df_unit_features['unitIDshank'].values], # - 2 b/c the get_UnitFeatureCell_features removes 0 and 1 IDs from each shank 'electrode_group': electrode_group } for unit_id in se_allshanks.get_unit_ids(): for property_name in property_descriptions.keys(): se_allshanks.set_unit_property( unit_id, property_name, property_values[property_name][unit_id]) se.NwbSortingExtractor.write_sorting( se_allshanks, nwbfile=nwbfile, property_descriptions=property_descriptions) print('done.') # Read and write LFP's print('reading LFPs...', end='', flush=True) lfp_fs, all_channels_lfp_data = ns.read_lfp(session_path, stub=stub) lfp_data = all_channels_lfp_data[:, all_shank_channels] print('writing LFPs...', flush=True) # lfp_data[:int(len(lfp_data)/4)] lfp_ts = ns.write_lfp(nwbfile, lfp_data, lfp_fs, name='lfp', description='lfp signal for all shank electrodes') # Read and add special environmental electrodes for name, channel in special_electrode_dict.items(): ts = TimeSeries( name=name, description= 'environmental electrode recorded inline with neural data', data=all_channels_lfp_data[:, channel], rate=lfp_fs, unit='V', #conversion=np.nan, resolution=np.nan) nwbfile.add_acquisition(ts) # compute filtered LFP print('filtering LFP...', end='', flush=True) all_lfp_phases = [] for passband in ('theta', 'gamma'): lfp_fft = filter_lfp( lfp_data[:, all_shank_channels == lfp_channel].ravel(), lfp_fs, passband=passband) lfp_phase, _ = hilbert_lfp(lfp_fft) all_lfp_phases.append(lfp_phase[:, np.newaxis]) data = np.dstack(all_lfp_phases) print('done.', flush=True) if include_spike_waveforms: print('writing waveforms...', end='', flush=True) nshanks = min((max_shanks, len(ns.get_shank_channels(session_path)))) for shankn in np.arange(nshanks, dtype=int) + 1: # Get spike activty from .spk file on a per-shank and per-sample basis ns.write_spike_waveforms(nwbfile, session_path, shankn, stub=stub) print('done.', flush=True) # Get the LFP Decomposition Series decomp_series = DecompositionSeries( name='LFPDecompositionSeries', description='Theta and Gamma phase for reference LFP', data=data, rate=lfp_fs, source_timeseries=lfp_ts, metric='phase', unit='radians') decomp_series.add_band(band_name='theta', band_limits=(4, 10)) decomp_series.add_band(band_name='gamma', band_limits=(30, 80)) check_module(nwbfile, 'ecephys', 'contains processed extracellular electrophysiology data' ).add_data_interface(decomp_series) [nwbfile.add_stimulus(x) for x in ns.get_events(session_path)] # create epochs corresponding to experiments/environments for the mouse sleep_state_fpath = os.path.join(session_path, '{}--StatePeriod.mat'.format(session_id)) exist_pos_data = any( os.path.isfile( os.path.join(session_path, '{}__{}.mat'.format( session_id, task_type['name']))) for task_type in task_types) if exist_pos_data: nwbfile.add_epoch_column('label', 'name of epoch') for task_type in task_types: label = task_type['name'] file = os.path.join(session_path, session_id + '__' + label + '.mat') if os.path.isfile(file): print('loading position for ' + label + '...', end='', flush=True) pos_obj = Position(name=label + '_position') matin = loadmat(file) tt = matin['twhl_norm'][:, 0] exp_times = find_discontinuities(tt) if 'conversion' in task_type: conversion = task_type['conversion'] else: conversion = np.nan for pos_type in ('twhl_norm', 'twhl_linearized'): if pos_type in matin: pos_data_norm = matin[pos_type][:, 1:] spatial_series_object = SpatialSeries( name=label + '_{}_spatial_series'.format(pos_type), data=H5DataIO(pos_data_norm, compression='gzip'), reference_frame='unknown', conversion=conversion, resolution=np.nan, timestamps=H5DataIO(tt, compression='gzip')) pos_obj.add_spatial_series(spatial_series_object) check_module( nwbfile, 'behavior', 'contains processed behavioral data').add_data_interface( pos_obj) for i, window in enumerate(exp_times): nwbfile.add_epoch(start_time=window[0], stop_time=window[1], label=label + '_' + str(i)) print('done.') # there are occasional mismatches between the matlab struct and the neuroscope files # regions: 3: 'CA3', 4: 'DG' trialdata_path = os.path.join(session_path, session_id + '__EightMazeRun.mat') if os.path.isfile(trialdata_path): trials_data = loadmat(trialdata_path)['EightMazeRun'] trialdatainfo_path = os.path.join(fpath_base, 'EightMazeRunInfo.mat') trialdatainfo = [ x[0] for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0] ] features = trialdatainfo[:7] features[:2] = 'start_time', 'stop_time', [ nwbfile.add_trial_column(x, 'description') for x in features[4:] + ['condition'] ] for trial_data in trials_data: if trial_data[3]: cond = 'run_left' else: cond = 'run_right' nwbfile.add_trial(start_time=trial_data[0], stop_time=trial_data[1], condition=cond, error_run=trial_data[4], stim_run=trial_data[5], both_visit=trial_data[6]) """ mono_syn_fpath = os.path.join(session_path, session_id+'-MonoSynConvClick.mat') matin = loadmat(mono_syn_fpath) exc = matin['FinalExcMonoSynID'] inh = matin['FinalInhMonoSynID'] #exc_obj = CatCellInfo(name='excitatory_connections', # indices_values=[], cell_index=exc[:, 0] - 1, indices=exc[:, 1] - 1) #module_cellular.add_container(exc_obj) #inh_obj = CatCellInfo(name='inhibitory_connections', # indices_values=[], cell_index=inh[:, 0] - 1, indices=inh[:, 1] - 1) #module_cellular.add_container(inh_obj) """ if os.path.isfile(sleep_state_fpath): matin = loadmat(sleep_state_fpath)['StatePeriod'] table = TimeIntervals(name='states', description='sleep states of animal') table.add_column(name='label', description='sleep state') data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append({ 'start_time': row[0], 'stop_time': row[1], 'label': name }) [ table.add_row(**row) for row in sorted(data, key=lambda x: x['start_time']) ] check_module(nwbfile, 'behavior', 'contains behavioral data').add_data_interface(table) print('writing NWB file...', end='', flush=True) if stub: out_fname = session_path + '_stub.nwb' else: out_fname = session_path + '.nwb' with NWBHDF5IO(out_fname, mode='w') as io: io.write(nwbfile, cache_spec=cache_spec) print('done.') print('testing read...', end='', flush=True) # test read with NWBHDF5IO(out_fname, mode='r') as io: io.read() print('done.')