def test_init_delayed_bands(self): timeseries = TimeSeries(name='dummy timeseries', description='desc', data=np.ones((3, 3)), unit='Volts', timestamps=np.ones((3, ))) spec_anal = DecompositionSeries(name='LFPSpectralAnalysis', description='my description', data=np.ones((3, 3, 3)), timestamps=np.ones((3, )), source_timeseries=timeseries, metric='amplitude') for band_name in ['alpha', 'beta', 'gamma']: spec_anal.add_band(band_name=band_name, band_limits=(1., 1.), band_mean=1., band_stdev=1.) self.assertEqual(spec_anal.name, 'LFPSpectralAnalysis') self.assertEqual(spec_anal.description, 'my description') np.testing.assert_equal(spec_anal.data, np.ones((3, 3, 3))) np.testing.assert_equal(spec_anal.timestamps, np.ones((3, ))) self.assertEqual(spec_anal.bands['band_name'].data, ['alpha', 'beta', 'gamma']) np.testing.assert_equal(spec_anal.bands['band_limits'].data, np.ones((3, 2))) self.assertEqual(spec_anal.source_timeseries, timeseries) self.assertEqual(spec_anal.metric, 'amplitude')
def yuta2nwb(session_path='/Users/bendichter/Desktop/Buzsaki/SenzaiBuzsaki2017/YutaMouse41/YutaMouse41-150903', subject_xls=None, include_spike_waveforms=True, stub=True): subject_path, session_id = os.path.split(session_path) fpath_base = os.path.split(subject_path)[0] identifier = session_id mouse_number = session_id[9:11] if '-' in session_id: subject_id, date_text = session_id.split('-') b = False else: subject_id, date_text = session_id.split('b') b = True if subject_xls is None: subject_xls = os.path.join(subject_path, 'YM' + mouse_number + ' exp_sheet.xlsx') else: if not subject_xls[-4:] == 'xlsx': subject_xls = os.path.join(subject_xls, 'YM' + mouse_number + ' exp_sheet.xlsx') session_start_time = dateparse(date_text, yearfirst=True) df = pd.read_excel(subject_xls) subject_data = {} for key in ['genotype', 'DOB', 'implantation', 'Probe', 'Surgery', 'virus injection', 'mouseID']: names = df.iloc[:, 0] if key in names.values: subject_data[key] = df.iloc[np.argmax(names == key), 1] if isinstance(subject_data['DOB'], datetime): age = session_start_time - subject_data['DOB'] else: age = None subject = Subject(subject_id=subject_id, age=str(age), genotype=subject_data['genotype'], species='mouse') nwbfile = NWBFile(session_description='mouse in open exploration and theta maze', identifier=identifier, session_start_time=session_start_time.astimezone(), file_create_date=datetime.now().astimezone(), experimenter='Yuta Senzai', session_id=session_id, institution='NYU', lab='Buzsaki', subject=subject, related_publications='DOI:10.1016/j.neuron.2016.12.011') print('reading and writing raw position data...', end='', flush=True) ns.add_position_data(nwbfile, session_path) shank_channels = ns.get_shank_channels(session_path)[:8] all_shank_channels = np.concatenate(shank_channels) print('setting up electrodes...', end='', flush=True) hilus_csv_path = os.path.join(fpath_base, 'early_session_hilus_chans.csv') lfp_channel = get_reference_elec(subject_xls, hilus_csv_path, session_start_time, session_id, b=b) print(lfp_channel) custom_column = [{'name': 'theta_reference', 'description': 'this electrode was used to calculate LFP canonical bands', 'data': all_shank_channels == lfp_channel}] ns.write_electrode_table(nwbfile, session_path, custom_columns=custom_column, max_shanks=max_shanks) print('reading LFPs...', end='', flush=True) lfp_fs, all_channels_data = ns.read_lfp(session_path, stub=stub) lfp_data = all_channels_data[:, all_shank_channels] print('writing LFPs...', flush=True) # lfp_data[:int(len(lfp_data)/4)] lfp_ts = ns.write_lfp(nwbfile, lfp_data, lfp_fs, name='lfp', description='lfp signal for all shank electrodes') for name, channel in special_electrode_dict.items(): ts = TimeSeries(name=name, description='environmental electrode recorded inline with neural data', data=all_channels_data[channel], rate=lfp_fs, unit='V', conversion=np.nan, resolution=np.nan) nwbfile.add_acquisition(ts) # compute filtered LFP print('filtering LFP...', end='', flush=True) all_lfp_phases = [] for passband in ('theta', 'gamma'): lfp_fft = filter_lfp(lfp_data[:, all_shank_channels == lfp_channel].ravel(), lfp_fs, passband=passband) lfp_phase, _ = hilbert_lfp(lfp_fft) all_lfp_phases.append(lfp_phase[:, np.newaxis]) data = np.dstack(all_lfp_phases) print('done.', flush=True) if include_spike_waveforms: print('writing waveforms...', end='', flush=True) for shankn in np.arange(1, 9, dtype=int): ns.write_spike_waveforms(nwbfile, session_path, shankn, stub=stub) print('done.', flush=True) decomp_series = DecompositionSeries(name='LFPDecompositionSeries', description='Theta and Gamma phase for reference LFP', data=data, rate=lfp_fs, source_timeseries=lfp_ts, metric='phase', unit='radians') decomp_series.add_band(band_name='theta', band_limits=(4, 10)) decomp_series.add_band(band_name='gamma', band_limits=(30, 80)) check_module(nwbfile, 'ecephys', 'contains processed extracellular electrophysiology data').add_data_interface(decomp_series) [nwbfile.add_stimulus(x) for x in ns.get_events(session_path)] # create epochs corresponding to experiments/environments for the mouse sleep_state_fpath = os.path.join(session_path, '{}--StatePeriod.mat'.format(session_id)) exist_pos_data = any(os.path.isfile(os.path.join(session_path, '{}__{}.mat'.format(session_id, task_type['name']))) for task_type in task_types) if exist_pos_data: nwbfile.add_epoch_column('label', 'name of epoch') for task_type in task_types: label = task_type['name'] file = os.path.join(session_path, session_id + '__' + label + '.mat') if os.path.isfile(file): print('loading position for ' + label + '...', end='', flush=True) pos_obj = Position(name=label + '_position') matin = loadmat(file) tt = matin['twhl_norm'][:, 0] exp_times = find_discontinuities(tt) if 'conversion' in task_type: conversion = task_type['conversion'] else: conversion = np.nan for pos_type in ('twhl_norm', 'twhl_linearized'): if pos_type in matin: pos_data_norm = matin[pos_type][:, 1:] spatial_series_object = SpatialSeries( name=label + '_{}_spatial_series'.format(pos_type), data=H5DataIO(pos_data_norm, compression='gzip'), reference_frame='unknown', conversion=conversion, resolution=np.nan, timestamps=H5DataIO(tt, compression='gzip')) pos_obj.add_spatial_series(spatial_series_object) check_module(nwbfile, 'behavior', 'contains processed behavioral data').add_data_interface(pos_obj) for i, window in enumerate(exp_times): nwbfile.add_epoch(start_time=window[0], stop_time=window[1], label=label + '_' + str(i)) print('done.') # there are occasional mismatches between the matlab struct and the neuroscope files # regions: 3: 'CA3', 4: 'DG' df_unit_features = get_UnitFeatureCell_features(fpath_base, session_id, session_path) celltype_names = [] for celltype_id, region_id in zip(df_unit_features['fineCellType'].values, df_unit_features['region'].values): if celltype_id == 1: if region_id == 3: celltype_names.append('pyramidal cell') elif region_id == 4: celltype_names.append('granule cell') else: raise Exception('unknown type') elif not np.isfinite(celltype_id): celltype_names.append('missing') else: celltype_names.append(celltype_dict[celltype_id]) custom_unit_columns = [ { 'name': 'cell_type', 'description': 'name of cell type', 'data': celltype_names}, { 'name': 'global_id', 'description': 'global id for cell for entire experiment', 'data': df_unit_features['unitID'].values}, { 'name': 'max_electrode', 'description': 'electrode that has the maximum amplitude of the waveform', 'data': get_max_electrodes(nwbfile, session_path), 'table': nwbfile.electrodes }] ns.add_units(nwbfile, session_path, custom_unit_columns, max_shanks=max_shanks) trialdata_path = os.path.join(session_path, session_id + '__EightMazeRun.mat') if os.path.isfile(trialdata_path): trials_data = loadmat(trialdata_path)['EightMazeRun'] trialdatainfo_path = os.path.join(fpath_base, 'EightMazeRunInfo.mat') trialdatainfo = [x[0] for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0]] features = trialdatainfo[:7] features[:2] = 'start_time', 'stop_time', [nwbfile.add_trial_column(x, 'description') for x in features[4:] + ['condition']] for trial_data in trials_data: if trial_data[3]: cond = 'run_left' else: cond = 'run_right' nwbfile.add_trial(start_time=trial_data[0], stop_time=trial_data[1], condition=cond, error_run=trial_data[4], stim_run=trial_data[5], both_visit=trial_data[6]) """ mono_syn_fpath = os.path.join(session_path, session_id+'-MonoSynConvClick.mat') matin = loadmat(mono_syn_fpath) exc = matin['FinalExcMonoSynID'] inh = matin['FinalInhMonoSynID'] #exc_obj = CatCellInfo(name='excitatory_connections', # indices_values=[], cell_index=exc[:, 0] - 1, indices=exc[:, 1] - 1) #module_cellular.add_container(exc_obj) #inh_obj = CatCellInfo(name='inhibitory_connections', # indices_values=[], cell_index=inh[:, 0] - 1, indices=inh[:, 1] - 1) #module_cellular.add_container(inh_obj) """ if os.path.isfile(sleep_state_fpath): matin = loadmat(sleep_state_fpath)['StatePeriod'] table = TimeIntervals(name='states', description='sleep states of animal') table.add_column(name='label', description='sleep state') data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append({'start_time': row[0], 'stop_time': row[1], 'label': name}) [table.add_row(**row) for row in sorted(data, key=lambda x: x['start_time'])] check_module(nwbfile, 'behavior', 'contains behavioral data').add_data_interface(table) if stub: out_fname = session_path + '_stub.nwb' else: out_fname = session_path + '.nwb' print('writing NWB file...', end='', flush=True) with NWBHDF5IO(out_fname, mode='w') as io: io.write(nwbfile) print('done.') print('testing read...', end='', flush=True) # test read with NWBHDF5IO(out_fname, mode='r') as io: io.read() print('done.')
def convert_data(self, nwbfile: NWBFile, metadata_dict: dict, stub_test: bool = False): session_path = self.input_args['folder_path'] # TODO: check/enforce format? all_shank_channels = metadata_dict['all_shank_channels'] special_electrode_dict = metadata_dict['special_electrodes'] lfp_channel = metadata_dict['lfp_channel'] lfp_sampling_rate = metadata_dict['lfp_sampling_rate'] spikes_nsamples = metadata_dict['spikes_nsamples'] shank_channels = metadata_dict['shank_channels'] subject_path, session_id = os.path.split(session_path) _, all_channels_lfp_data = read_lfp(session_path, stub=stub_test) lfp_data = all_channels_lfp_data[:, all_shank_channels] lfp_ts = write_lfp(nwbfile, lfp_data, lfp_sampling_rate, name=metadata_dict['lfp']['name'], description=metadata_dict['lfp']['description'], electrode_inds=None) # TODO: error checking on format? for special_electrode in special_electrode_dict: ts = TimeSeries( name=special_electrode['name'], description=special_electrode['description'], data=all_channels_lfp_data[:, special_electrode['channel']], rate=lfp_sampling_rate, unit='V', resolution=np.nan) nwbfile.add_acquisition(ts) # TODO: discuss/consider more robust checking well prior to this # when missing experimental sheets for a subject, the lfp_channel cannot be determined(?) # which causes uninformative downstream errors at this step because lfp_channel is None # (get_reference_electrode does throw a warning, though) if lfp_channel is not None: all_lfp_phases = [] for passband in ('theta', 'gamma'): lfp_fft = filter_lfp( lfp_data[:, all_shank_channels == lfp_channel].ravel(), lfp_sampling_rate, passband=passband) lfp_phase, _ = hilbert_lfp(lfp_fft) all_lfp_phases.append(lfp_phase[:, np.newaxis]) decomp_series_data = np.dstack(all_lfp_phases) # TODO: should units or metrics be metadata? decomp_series = DecompositionSeries( name=metadata_dict['lfp_decomposition']['name'], description=metadata_dict['lfp_decomposition']['description'], data=decomp_series_data, rate=lfp_sampling_rate, source_timeseries=lfp_ts, metric='phase', unit='radians') # TODO: the band limits should be extracted from parse_passband in band_analysis? decomp_series.add_band(band_name='theta', band_limits=(4, 10)) decomp_series.add_band(band_name='gamma', band_limits=(30, 80)) check_module( nwbfile, 'ecephys', 'contains processed extracellular electrophysiology data' ).add_data_interface(decomp_series) write_spike_waveforms(nwbfile, session_path, spikes_nsamples=spikes_nsamples, shank_channels=shank_channels, stub_test=stub_test)
def run_conversion(self, nwbfile: NWBFile, metadata: dict, stub_test: bool = False): super().run_conversion(nwbfile=nwbfile, metadata=metadata, stub_test=stub_test) session_path = Path(self.source_data["file_path"]).parent session_id = session_path.name subject_path = session_path.parent xml_filepath = session_path / f"{session_id}.xml" root = et.parse(str(xml_filepath)).getroot() n_total_channels = int(root.find("acquisitionSystem").find("nChannels").text) lfp_sampling_rate = float(root.find("fieldPotentials").find("lfpSamplingRate").text) shank_channels = [ [int(channel.text) for channel in group.find("channels")] for group in root.find("spikeDetection").find("channelGroups").findall("group") ] all_shank_channels = np.concatenate(shank_channels) # Flattened # Special electrodes special_electrode_mapping = dict( ch_wait=79, ch_arm=78, ch_solL=76, ch_solR=77, ch_dig1=65, ch_dig2=68, ch_entL=72, ch_entR=71, ch_SsolL=73, ch_SsolR=70, ) special_electrodes = [] for special_electrode_name, channel in special_electrode_mapping.items(): if channel <= n_total_channels - 1: special_electrodes.append( dict( name=special_electrode_name, channel=channel, description="Environmental electrode recorded inline with neural data.", ) ) _, all_channels_lfp_data = read_lfp(session_path, stub=stub_test) for special_electrode in special_electrodes: ts = TimeSeries( name=special_electrode["name"], description=special_electrode["description"], data=all_channels_lfp_data[:, special_electrode["channel"]], rate=lfp_sampling_rate, unit="V", resolution=np.nan, ) nwbfile.add_acquisition(ts) # DecompositionSeries mouse_number = session_id[-9:-7] subject_xls = str(subject_path / f"DGProject/YM{mouse_number} exp_sheet.xlsx") hilus_csv_path = str(subject_path / "DGProject/early_session_hilus_chans.csv") session_start = metadata["NWBFile"]["session_start_time"] if "-" in session_id: b = False else: b = True lfp_channel = get_reference_elec(subject_xls, hilus_csv_path, session_start, session_id, b=b) if lfp_channel is not None: lfp_data = all_channels_lfp_data[:, all_shank_channels] all_lfp_phases = [] for passband in ("theta", "gamma"): lfp_fft = filter_lfp( lfp_data[:, all_shank_channels == lfp_channel].ravel(), lfp_sampling_rate, passband=passband, ) lfp_phase, _ = hilbert_lfp(lfp_fft) all_lfp_phases.append(lfp_phase[:, np.newaxis]) decomp_series_data = np.dstack(all_lfp_phases) ecephys_mod = check_module( nwbfile, "ecephys", "Intermediate data from extracellular electrophysiology recordings, e.g., LFP.", ) lfp_ts = ecephys_mod.data_interfaces["LFP"]["LFP"] decomp_series = DecompositionSeries( name="LFPDecompositionSeries", description="Theta and Gamma phase for reference LFP", data=decomp_series_data, rate=lfp_sampling_rate, source_timeseries=lfp_ts, metric="phase", unit="radians", ) # TODO: the band limits should be extracted from parse_passband in band_analysis? decomp_series.add_band(band_name="theta", band_limits=(4, 10)) decomp_series.add_band(band_name="gamma", band_limits=(30, 80)) check_module( nwbfile, "ecephys", "Contains processed extracellular electrophysiology data.", ).add(decomp_series)
def chang2nwb(blockpath, outpath=None, session_start_time=None, session_description=None, identifier=None, anin4=False, ecog_format='auto', external_subject=True, include_pitch=False, include_intensity=False, speakers=True, mic=False, mini=False, hilb=False, verbose=False, imaging_path=None, parse_transcript=False, include_cortical_surfaces=True, include_electrodes=True, include_ekg=True, subject_image_list=None, rest_period=None, load_warped=False, **kwargs): """ Parameters ---------- blockpath: str outpath: None | str if None, output = [blockpath]/[blockname].nwb session_start_time: datetime.datetime default: datetime(1900, 1, 1) session_description: str default: blockname identifier: str default: blockname anin4: False | str Whether or not to convert ANIN4. ANIN4 is used as an extra channel for things like button presses, and is usually unused. If a string is supplied, that is used as the name of the timeseries. ecog_format: str ({'htk'}, 'mat', 'raw') external_subject: bool (optional) True: (default) cortical mesh is saved in an external file and a link is provided to that file. This is useful if you have multiple sessions for a single subject. False: cortical mesh is saved normally include_pitch: bool (optional) add pitch data. Default: False include_intensity: bool (optional) add intensity data. Default: False speakers: bool (optional) Default: False mic: bool (optional) default: False mini: only save data stub. Used for testing hilb: bool include Hilbert Transform data. Default: False verbose: bool (optional) imaging_path: str (optional) None: use IMAGING_DIR 'local': use subject_dir/Imaging/ else: use supplied string parse_transcript: str (optional) include_cortical_surfaces: bool (optional) include_electrodes: bool (optional) include_ekg: bool (optional) subject_image_list: list (optional) List of paths of images to include rest_period: None | array-like kwargs: dict passed to pynwb.NWBFile Returns ------- """ behav_module = None basepath, blockname = os.path.split(blockpath) subject_id = get_subject_id(blockname) if identifier is None: identifier = blockname if session_description is None: session_description = blockname if outpath is None: outpath = blockpath + '.nwb' out_base_path = os.path.split(outpath)[0] if session_start_time is None: session_start_time = datetime(1900, 1, 1).astimezone(timezone('UTC')) if imaging_path is None: subj_imaging_path = path.join(IMAGING_PATH, subject_id) elif imaging_path == 'local': subj_imaging_path = path.join(basepath, 'imaging') else: subj_imaging_path = os.path.join(imaging_path, subject_id) # file paths bad_time_file = path.join(blockpath, 'Artifacts', 'badTimeSegments.mat') ecog_path = path.join(blockpath, 'RawHTK') ecog400_path = path.join(blockpath, 'ecog400', 'ecog.mat') elec_metadata_file = path.join(subj_imaging_path, 'elecs', 'TDT_elecs_all.mat') mesh_path = path.join(subj_imaging_path, 'Meshes') pial_files = glob.glob(path.join(mesh_path, '*pial.mat')) # Create the NWB file object nwbfile = NWBFile(session_description, identifier, session_start_time, datetime.now().astimezone(), session_id=identifier, institution='University of California, San Francisco', lab='Chang Lab', **kwargs) nwbfile.add_electrode_column('bad', 'electrode identified as too noisy') bad_elecs_inds = get_bad_elecs(blockpath) if include_electrodes: add_electrodes(nwbfile, elec_metadata_file, bad_elecs_inds, load_warped=load_warped) else: device = nwbfile.create_device('256Grid') electrode_group = nwbfile.create_electrode_group( name='256Grid electrodes', description='auto_group', location='location', device=device) for elec_counter in range(256): bad = elec_counter in bad_elecs_inds nwbfile.add_electrode(id=elec_counter + 1, x=np.nan, y=np.nan, z=np.nan, imp=np.nan, location=' ', filtering='none', group=electrode_group, bad=bad) ecog_elecs = list(range(len(nwbfile.electrodes))) ecog_elecs_region = nwbfile.create_electrode_table_region( ecog_elecs, 'ECoG electrodes on brain') # Read electrophysiology data from HTK files and add them to NWB file if ecog_format == 'auto': ecog_rate, data, ecog_path = auto_ecog(blockpath, ecog_elecs, verbose=False) elif ecog_format == 'htk': if verbose: print('reading htk acquisition...', flush=True) ecog_rate, data = readhtks(ecog_path, ecog_elecs) data = data.squeeze() if verbose: print('done', flush=True) elif ecog_format == 'mat': with File(ecog400_path, 'r') as f: data = f['ecogDS']['data'][:, ecog_elecs] ecog_rate = f['ecogDS']['sampFreq'][:].ravel()[0] ecog_path = ecog400_path elif ecog_format == 'raw': ecog_path = os.path.join(tdt_data_path, subject_id, blockname, 'raw.mat') ecog_rate, data = load_wavs(ecog_path) else: raise ValueError('unrecognized argument: ecog_format') ts_desc = "all Wav data" if mini: data = data[:2000] ecog_ts = ElectricalSeries(name='ElectricalSeries', data=H5DataIO(data, compression='gzip'), electrodes=ecog_elecs_region, rate=ecog_rate, description=ts_desc, conversion=0.001) nwbfile.add_acquisition(ecog_ts) if include_ekg: ekg_elecs = find_ekg_elecs(elec_metadata_file) if len(ekg_elecs): add_ekg(nwbfile, ecog_path, ekg_elecs) if mic: # Add microphone recording from room fs, data = get_analog(blockpath, 1) nwbfile.add_acquisition( TimeSeries('microphone', data, 'audio unit', rate=fs, description="audio recording from microphone in room")) if speakers: fs, data = get_analog(blockpath, 2) # Add audio stimulus 1 nwbfile.add_stimulus( TimeSeries('speaker 1', data, 'NA', rate=fs, description="audio stimulus 1")) # Add audio stimulus 2 fs, data = get_analog(blockpath, 3) if fs is not None: nwbfile.add_stimulus( TimeSeries('speaker 2', data, 'NA', rate=fs, description='the second stimulus source')) if anin4: fs, data = get_analog(blockpath, 4) nwbfile.add_acquisition( TimeSeries(anin4, data, 'aux unit', rate=fs, description="aux analog recording")) # Add bad time segments if os.path.exists(bad_time_file) and os.stat(bad_time_file).st_size: bad_time = sio.loadmat(bad_time_file)['badTimeSegments'] for row in bad_time: nwbfile.add_invalid_time_interval(start_time=row[0], stop_time=row[1], tags=('ECoG artifact', ), timeseries=ecog_ts) if rest_period is not None: nwbfile.add_epoch_column(name='label', description='label') nwbfile.add_epoch(start_time=rest_period[0], stop_time=rest_period[1], label='rest_period') if hilb: block_hilb_path = os.path.join(hilb_dir, subject_id, blockname, blockname + '_AA.h5') file = File(block_hilb_path, 'r') data = transpose_iter( file['X']) # transposes data during iterative write filter_center = file['filter_center'][:] filter_sigma = file['filter_sigma'][:] data = H5DataIO(DataChunkIterator(tqdm(data, desc='writing hilbert data'), buffer_size=400 * 20), compression='gzip') decomp_series = DecompositionSeries( name='LFPDecompositionSeries', description='Gaussian band Hilbert transform', data=data, rate=400., source_timeseries=ecog_ts, metric='amplitude') for band_mean, band_stdev in zip(filter_center, filter_sigma): decomp_series.add_band(band_mean=band_mean, band_stdev=band_stdev) hilb_mod = nwbfile.create_processing_module( name='ecephys', description='holds hilbert analysis results') hilb_mod.add_container(decomp_series) if include_cortical_surfaces: subject = ECoGSubject(subject_id=subject_id) subject.cortical_surfaces = create_cortical_surfaces( pial_files, subject_id) else: subject = Subject(subject_id=subject_id, species='H**o sapiens') if subject_image_list is not None: subject = add_images_to_subject(subject, subject_image_list) if external_subject: subj_fpath = path.join(out_base_path, subject_id + '.nwb') if not os.path.isfile(subj_fpath): subj_nwbfile = NWBFile(session_description=subject_id, identifier=subject_id, subject=subject, session_start_time=datetime( 1900, 1, 1).astimezone(timezone('UTC'))) with NWBHDF5IO(subj_fpath, manager=manager, mode='w') as subj_io: subj_io.write(subj_nwbfile) subj_read_io = NWBHDF5IO(subj_fpath, manager=manager, mode='r') subj_nwbfile = subj_read_io.read() subject = subj_nwbfile.subject nwbfile.subject = subject if parse_transcript: if parse_transcript == 'CV': parseout = parse(blockpath, blockname) df = make_df(parseout, 0, subject_id, align_pos=1) nwbfile.add_trial_column('cv_transition_time', 'time of CV transition in seconds') nwbfile.add_trial_column( 'speak', 'if True, subject is speaking. If False, subject is listening') nwbfile.add_trial_column('condition', 'syllable spoken') for _, row in df.iterrows(): nwbfile.add_trial(start_time=row['start'], stop_time=row['stop'], cv_transition_time=row['align'], speak=row['mode'] == 'speak', condition=row['label']) elif parse_transcript == 'singing': parseout = parse(blockpath, blockname) df = make_df(parseout, 0, subject_id, align_pos=0) if not len(df): df = pd.DataFrame(parseout) df['mode'] = 'speak' df = df.loc[df['label'].astype('bool'), :] # handle empty labels nwbfile.add_trial_column( 'speak', 'if True, subject is speaking. If False, subject is listening') nwbfile.add_trial_column('condition', 'syllable spoken') for _, row in df.iterrows(): nwbfile.add_trial(start_time=row['start'], stop_time=row['stop'], speak=row['mode'] == 'speak', condition=row['label']) elif parse_transcript == 'emphasis': parseout = parse(blockpath, blockname) try: df = make_df(parseout, 0, subject_id, align_pos=0) except: df = pd.DataFrame(parseout) if not len(df): df = pd.DataFrame(parseout) df = df.loc[df['label'].astype('bool'), :] # handle empty labels nwbfile.add_trial_column('condition', 'word emphasized') nwbfile.add_trial_column( 'speak', 'if True, subject is speaking. If False, subject is listening') for _, row in df.iterrows(): nwbfile.add_trial(start_time=row['start'], stop_time=row['stop'], speak=True, condition=row['label']) elif parse_transcript == 'MOCHA': nwbfile = create_transcription(nwbfile, transcript_path, blockname) # behavior if include_pitch: if behav_module is None: behav_module = nwbfile.create_processing_module( 'behavior', 'processing about behavior') if os.path.isfile( os.path.join(blockpath, 'pitch_' + blockname + '.mat')): fs, data = load_pitch(blockpath) pitch_ts = TimeSeries( data=data, rate=fs, unit='Hz', name='pitch', description= 'Pitch as extracted from Praat. NaNs mark unvoiced regions.') behav_module.add_container( BehavioralTimeSeries(name='pitch', time_series=pitch_ts)) else: print('No pitch file for ' + blockname) if include_intensity: if behav_module is None: behav_module = nwbfile.create_processing_module( 'behavior', 'processing about behavior') if os.path.isfile( os.path.join(blockpath, 'intensity_' + blockname + '.mat')): fs, data = load_pitch(blockpath) intensity_ts = TimeSeries( data=data, rate=fs, unit='dB', name='intensity', description='Intensity of speech in dB extracted from Praat.') behav_module.add_container( BehavioralTimeSeries(name='intensity', time_series=intensity_ts)) else: print('No intensity file for ' + blockname) # Export the NWB file with NWBHDF5IO(outpath, manager=manager, mode='w') as io: io.write(nwbfile) if external_subject: subj_read_io.close() if hilb: file.close() # read check with NWBHDF5IO(outpath, manager=manager, mode='r') as io: io.read()
def convert_data(self, nwbfile: NWBFile, metadata: dict, stub_test: bool = False): session_path = self.input_args["folder_path"] # TODO: check/enforce format? all_shank_channels = metadata["all_shank_channels"] special_electrode_dict = metadata.get("special_electrodes", []) lfp_channels = metadata["lfp_channels"] lfp_sampling_rate = metadata["lfp_sampling_rate"] spikes_nsamples = metadata["spikes_nsamples"] shank_channels = metadata["shank_channels"] n_total_channels = metadata["n_total_channels"] subject_path, session_id = os.path.split(session_path) _, all_channels_lfp_data = read_lfp(session_path, stub=stub_test, n_channels=n_total_channels) try: lfp_data = all_channels_lfp_data[:, all_shank_channels] except IndexError: lfp_data = all_channels_lfp_data lfp_ts = write_lfp( nwbfile, lfp_data, lfp_sampling_rate, name=metadata["lfp"]["name"], description=metadata["lfp"]["description"], electrode_inds=None, ) # TODO: error checking on format? for special_electrode in special_electrode_dict: ts = TimeSeries( name=special_electrode["name"], description=special_electrode["description"], data=all_channels_lfp_data[:, special_electrode["channel"]], rate=lfp_sampling_rate, unit="V", resolution=np.nan, ) nwbfile.add_acquisition(ts) for ref_name, lfp_channel in lfp_channels.items(): try: all_lfp_phases = [] for passband in ("theta", "gamma"): lfp_fft = filter_lfp( lfp_data[:, all_shank_channels == lfp_channel].ravel(), lfp_sampling_rate, passband=passband) lfp_phase, _ = hilbert_lfp(lfp_fft) all_lfp_phases.append(lfp_phase[:, np.newaxis]) decomp_series_data = np.dstack(all_lfp_phases) # TODO: should units or metrics be metadata? decomp_series = DecompositionSeries( name=metadata["lfp_decomposition"][ref_name]["name"], description=metadata["lfp_decomposition"][ref_name] ["description"], data=decomp_series_data, rate=lfp_sampling_rate, source_timeseries=lfp_ts, metric="phase", unit="radians", ) # TODO: the band limits should be extracted from parse_passband in band_analysis? decomp_series.add_band(band_name="theta", band_limits=(4, 10)) decomp_series.add_band(band_name="gamma", band_limits=(30, 80)) check_module( nwbfile, "ecephys", "contains processed extracellular electrophysiology data" ).add_data_interface(decomp_series) except IndexError: print( "Unable to index lfp data for decomposition series - skipping" ) write_spike_waveforms(nwbfile, session_path, spikes_nsamples=spikes_nsamples, shank_channels=shank_channels, stub_test=stub_test)
def yuta2nwb( session_path='D:/BuzsakiData/SenzaiY/YutaMouse41/YutaMouse41-150903', # '/Users/bendichter/Desktop/Buzsaki/SenzaiBuzsaki2017/YutaMouse41/YutaMouse41-150903', subject_xls=None, include_spike_waveforms=True, stub=True, cache_spec=True): subject_path, session_id = os.path.split(session_path) fpath_base = os.path.split(subject_path)[0] identifier = session_id mouse_number = session_id[9:11] if '-' in session_id: subject_id, date_text = session_id.split('-') b = False else: subject_id, date_text = session_id.split('b') b = True if subject_xls is None: subject_xls = os.path.join(subject_path, 'YM' + mouse_number + ' exp_sheet.xlsx') else: if not subject_xls[-4:] == 'xlsx': subject_xls = os.path.join(subject_xls, 'YM' + mouse_number + ' exp_sheet.xlsx') session_start_time = dateparse(date_text, yearfirst=True) df = pd.read_excel(subject_xls) subject_data = {} for key in [ 'genotype', 'DOB', 'implantation', 'Probe', 'Surgery', 'virus injection', 'mouseID' ]: names = df.iloc[:, 0] if key in names.values: subject_data[key] = df.iloc[np.argmax(names == key), 1] if isinstance(subject_data['DOB'], datetime): age = session_start_time - subject_data['DOB'] else: age = None subject = Subject(subject_id=subject_id, age=str(age), genotype=subject_data['genotype'], species='mouse') nwbfile = NWBFile( session_description='mouse in open exploration and theta maze', identifier=identifier, session_start_time=session_start_time.astimezone(), file_create_date=datetime.now().astimezone(), experimenter='Yuta Senzai', session_id=session_id, institution='NYU', lab='Buzsaki', subject=subject, related_publications='DOI:10.1016/j.neuron.2016.12.011') print('reading and writing raw position data...', end='', flush=True) ns.add_position_data(nwbfile, session_path) shank_channels = ns.get_shank_channels(session_path)[:8] nshanks = len(shank_channels) all_shank_channels = np.concatenate(shank_channels) print('setting up electrodes...', end='', flush=True) hilus_csv_path = os.path.join(fpath_base, 'early_session_hilus_chans.csv') lfp_channel = get_reference_elec(subject_xls, hilus_csv_path, session_start_time, session_id, b=b) custom_column = [{ 'name': 'theta_reference', 'description': 'this electrode was used to calculate LFP canonical bands', 'data': all_shank_channels == lfp_channel }] ns.write_electrode_table(nwbfile, session_path, custom_columns=custom_column, max_shanks=max_shanks) print('reading raw electrode data...', end='', flush=True) if stub: # example recording extractor for fast testing xml_filepath = os.path.join(session_path, session_id + '.xml') xml_root = et.parse(xml_filepath).getroot() acq_sampling_frequency = float( xml_root.find('acquisitionSystem').find('samplingRate').text) num_channels = 4 num_frames = 10000 X = np.random.normal(0, 1, (num_channels, num_frames)) geom = np.random.normal(0, 1, (num_channels, 2)) X = (X * 100).astype(int) sre = se.NumpyRecordingExtractor( timeseries=X, sampling_frequency=acq_sampling_frequency, geom=geom) else: nre = se.NeuroscopeRecordingExtractor('{}/{}.dat'.format( session_path, session_id)) sre = se.SubRecordingExtractor(nre, channel_ids=all_shank_channels) print('writing raw electrode data...', end='', flush=True) se.NwbRecordingExtractor.add_electrical_series(sre, nwbfile) print('done.') print('reading spiking units...', end='', flush=True) if stub: spike_times = [200, 300, 400] num_frames = 10000 allshanks = [] for k in range(nshanks): SX = se.NumpySortingExtractor() for j in range(len(spike_times)): SX.add_unit(unit_id=j + 1, times=np.sort( np.random.uniform(0, num_frames, spike_times[j]))) allshanks.append(SX) se_allshanks = se.MultiSortingExtractor(allshanks) se_allshanks.set_sampling_frequency(acq_sampling_frequency) else: se_allshanks = se.NeuroscopeMultiSortingExtractor(session_path, keep_mua_units=False) electrode_group = [] for shankn in np.arange(1, nshanks + 1, dtype=int): for id in se_allshanks.sortings[shankn - 1].get_unit_ids(): electrode_group.append(nwbfile.electrode_groups['shank' + str(shankn)]) df_unit_features = get_UnitFeatureCell_features(fpath_base, session_id, session_path) celltype_names = [] for celltype_id, region_id in zip(df_unit_features['fineCellType'].values, df_unit_features['region'].values): if celltype_id == 1: if region_id == 3: celltype_names.append('pyramidal cell') elif region_id == 4: celltype_names.append('granule cell') else: raise Exception('unknown type') elif not np.isfinite(celltype_id): celltype_names.append('missing') else: celltype_names.append(celltype_dict[celltype_id]) # Add custom column data into the SortingExtractor so it can be written by the converter # Note there is currently a hidden assumption that the way in which the NeuroscopeSortingExtractor # merges the cluster IDs matches one-to-one with the get_UnitFeatureCell_features extraction property_descriptions = { 'cell_type': 'name of cell type', 'global_id': 'global id for cell for entire experiment', 'shank_id': '0-indexed id of cluster of shank', 'electrode_group': 'the electrode group that each spike unit came from' } property_values = { 'cell_type': celltype_names, 'global_id': df_unit_features['unitID'].values, 'shank_id': [x - 2 for x in df_unit_features['unitIDshank'].values], # - 2 b/c the get_UnitFeatureCell_features removes 0 and 1 IDs from each shank 'electrode_group': electrode_group } for unit_id in se_allshanks.get_unit_ids(): for property_name in property_descriptions.keys(): se_allshanks.set_unit_property( unit_id, property_name, property_values[property_name][unit_id]) se.NwbSortingExtractor.write_sorting( se_allshanks, nwbfile=nwbfile, property_descriptions=property_descriptions) print('done.') # Read and write LFP's print('reading LFPs...', end='', flush=True) lfp_fs, all_channels_lfp_data = ns.read_lfp(session_path, stub=stub) lfp_data = all_channels_lfp_data[:, all_shank_channels] print('writing LFPs...', flush=True) # lfp_data[:int(len(lfp_data)/4)] lfp_ts = ns.write_lfp(nwbfile, lfp_data, lfp_fs, name='lfp', description='lfp signal for all shank electrodes') # Read and add special environmental electrodes for name, channel in special_electrode_dict.items(): ts = TimeSeries( name=name, description= 'environmental electrode recorded inline with neural data', data=all_channels_lfp_data[:, channel], rate=lfp_fs, unit='V', #conversion=np.nan, resolution=np.nan) nwbfile.add_acquisition(ts) # compute filtered LFP print('filtering LFP...', end='', flush=True) all_lfp_phases = [] for passband in ('theta', 'gamma'): lfp_fft = filter_lfp( lfp_data[:, all_shank_channels == lfp_channel].ravel(), lfp_fs, passband=passband) lfp_phase, _ = hilbert_lfp(lfp_fft) all_lfp_phases.append(lfp_phase[:, np.newaxis]) data = np.dstack(all_lfp_phases) print('done.', flush=True) if include_spike_waveforms: print('writing waveforms...', end='', flush=True) nshanks = min((max_shanks, len(ns.get_shank_channels(session_path)))) for shankn in np.arange(nshanks, dtype=int) + 1: # Get spike activty from .spk file on a per-shank and per-sample basis ns.write_spike_waveforms(nwbfile, session_path, shankn, stub=stub) print('done.', flush=True) # Get the LFP Decomposition Series decomp_series = DecompositionSeries( name='LFPDecompositionSeries', description='Theta and Gamma phase for reference LFP', data=data, rate=lfp_fs, source_timeseries=lfp_ts, metric='phase', unit='radians') decomp_series.add_band(band_name='theta', band_limits=(4, 10)) decomp_series.add_band(band_name='gamma', band_limits=(30, 80)) check_module(nwbfile, 'ecephys', 'contains processed extracellular electrophysiology data' ).add_data_interface(decomp_series) [nwbfile.add_stimulus(x) for x in ns.get_events(session_path)] # create epochs corresponding to experiments/environments for the mouse sleep_state_fpath = os.path.join(session_path, '{}--StatePeriod.mat'.format(session_id)) exist_pos_data = any( os.path.isfile( os.path.join(session_path, '{}__{}.mat'.format( session_id, task_type['name']))) for task_type in task_types) if exist_pos_data: nwbfile.add_epoch_column('label', 'name of epoch') for task_type in task_types: label = task_type['name'] file = os.path.join(session_path, session_id + '__' + label + '.mat') if os.path.isfile(file): print('loading position for ' + label + '...', end='', flush=True) pos_obj = Position(name=label + '_position') matin = loadmat(file) tt = matin['twhl_norm'][:, 0] exp_times = find_discontinuities(tt) if 'conversion' in task_type: conversion = task_type['conversion'] else: conversion = np.nan for pos_type in ('twhl_norm', 'twhl_linearized'): if pos_type in matin: pos_data_norm = matin[pos_type][:, 1:] spatial_series_object = SpatialSeries( name=label + '_{}_spatial_series'.format(pos_type), data=H5DataIO(pos_data_norm, compression='gzip'), reference_frame='unknown', conversion=conversion, resolution=np.nan, timestamps=H5DataIO(tt, compression='gzip')) pos_obj.add_spatial_series(spatial_series_object) check_module( nwbfile, 'behavior', 'contains processed behavioral data').add_data_interface( pos_obj) for i, window in enumerate(exp_times): nwbfile.add_epoch(start_time=window[0], stop_time=window[1], label=label + '_' + str(i)) print('done.') # there are occasional mismatches between the matlab struct and the neuroscope files # regions: 3: 'CA3', 4: 'DG' trialdata_path = os.path.join(session_path, session_id + '__EightMazeRun.mat') if os.path.isfile(trialdata_path): trials_data = loadmat(trialdata_path)['EightMazeRun'] trialdatainfo_path = os.path.join(fpath_base, 'EightMazeRunInfo.mat') trialdatainfo = [ x[0] for x in loadmat(trialdatainfo_path)['EightMazeRunInfo'][0] ] features = trialdatainfo[:7] features[:2] = 'start_time', 'stop_time', [ nwbfile.add_trial_column(x, 'description') for x in features[4:] + ['condition'] ] for trial_data in trials_data: if trial_data[3]: cond = 'run_left' else: cond = 'run_right' nwbfile.add_trial(start_time=trial_data[0], stop_time=trial_data[1], condition=cond, error_run=trial_data[4], stim_run=trial_data[5], both_visit=trial_data[6]) """ mono_syn_fpath = os.path.join(session_path, session_id+'-MonoSynConvClick.mat') matin = loadmat(mono_syn_fpath) exc = matin['FinalExcMonoSynID'] inh = matin['FinalInhMonoSynID'] #exc_obj = CatCellInfo(name='excitatory_connections', # indices_values=[], cell_index=exc[:, 0] - 1, indices=exc[:, 1] - 1) #module_cellular.add_container(exc_obj) #inh_obj = CatCellInfo(name='inhibitory_connections', # indices_values=[], cell_index=inh[:, 0] - 1, indices=inh[:, 1] - 1) #module_cellular.add_container(inh_obj) """ if os.path.isfile(sleep_state_fpath): matin = loadmat(sleep_state_fpath)['StatePeriod'] table = TimeIntervals(name='states', description='sleep states of animal') table.add_column(name='label', description='sleep state') data = [] for name in matin.dtype.names: for row in matin[name][0][0]: data.append({ 'start_time': row[0], 'stop_time': row[1], 'label': name }) [ table.add_row(**row) for row in sorted(data, key=lambda x: x['start_time']) ] check_module(nwbfile, 'behavior', 'contains behavioral data').add_data_interface(table) print('writing NWB file...', end='', flush=True) if stub: out_fname = session_path + '_stub.nwb' else: out_fname = session_path + '.nwb' with NWBHDF5IO(out_fname, mode='w') as io: io.write(nwbfile, cache_spec=cache_spec) print('done.') print('testing read...', end='', flush=True) # test read with NWBHDF5IO(out_fname, mode='r') as io: io.read() print('done.')