def main(): """ # Usage: $ python plot_ephys_nwb_file.py NWB_FILE_NAME """ nwb_file = sys.argv[1] print("plotting file: %s" % nwb_file) stimulus_ontology_file = StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE ont = StimulusOntology(ju.read(stimulus_ontology_file)) data_set = create_data_set(nwb_file=nwb_file, validate_stim=False, ontology=ont) vclamp_sweep_table = data_set.sweep_table[ data_set.sweep_table["clamp_mode"] == "VoltageClamp"] plot_data_set(data_set, vclamp_sweep_table, nwb_file) iclamp_sweep_table = data_set.sweep_table[ data_set.sweep_table["clamp_mode"] == "CurrentClamp"] plot_data_set(data_set, iclamp_sweep_table, nwb_file) plt.show()
def main(): """ Plot sweeps of a given ephys nwb file # Usage: $ python plot_ephys_nwb_file.py NWB_FILE_NAME """ nwb_file = sys.argv[1] print("plotting file: %s" % nwb_file) stimulus_ontology_file = StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE ont = StimulusOntology(ju.read(stimulus_ontology_file)) data_set = create_ephys_data_set(nwb_file=nwb_file) vclamp_sweep_table = data_set.filtered_sweep_table( clamp_mode=data_set.VOLTAGE_CLAMP) plot_data_set(data_set, vclamp_sweep_table, nwb_file) data_set = create_ephys_data_set(nwb_file=nwb_file) iclamp_sweep_table = data_set.filtered_sweep_table( clamp_mode=data_set.CURRENT_CLAMP) plot_data_set(data_set, iclamp_sweep_table, nwb_file) plt.show()
def test_get_stimulus_code_ext(NWB_file): default_ontology = StimulusOntology( ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE)) dataset = AibsDataSet(nwb_file=NWB_file, ontology=default_ontology) assert dataset.get_stimulus_code_ext("EXTPSMOKET180424", 0) == "EXTPSMOKET180424[0]"
def test_get_clamp_mode(NWB_file): default_ontology = StimulusOntology(ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE)) dataset = AibsDataSet(nwb_file=NWB_file, ontology=default_ontology) assert dataset.get_clamp_mode(0) == dataset.VOLTAGE_CLAMP
def run_qc(stimulus_ontology_file, cell_features, sweep_features, qc_criteria): """ Parameters ---------- stimulus_ontology_file : str ontology file name cell_features: dict cell features sweep_features : list of dicts sweep features qc_criteria: dict qc criteria Returns ------- dict containing state of the cell and sweeps """ lu.log_pretty_header("Perform QC checks", level=1) if not stimulus_ontology_file: stimulus_ontology_file = StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE logging.info( F"Ontology is not provided, using default {StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE}" ) ont = StimulusOntology(ju.read(stimulus_ontology_file)) cell_state, sweep_states = qcp.qc_experiment(ont, cell_features, sweep_features, qc_criteria) qc_summary(sweep_features, sweep_states, cell_features, cell_state) return dict(cell_state=cell_state, sweep_states=sweep_states)
def mies_nwb_data(tmp_nwb_path): nwbfile = nwbfile_to_test() print(tmp_nwb_path) with pynwb.NWBHDF5IO(path=tmp_nwb_path, mode="w") as writer: writer.write(nwbfile) ontology = StimulusOntology( [[('name', 'expected name'), ('code', 'STIMULUS_CODE')], [('name', 'test name'), ('code', 'extpexpend')] ]) class Notebook(LabNotebookReader): def get_value(self, key, sweep_num, default): return { ("Scale Factor", 4): 200.0, ("Set Sweep Count", 4): "1" }.get((key, sweep_num), default) fake_notebook = Notebook() return MIESNWBData(nwb_file=tmp_nwb_path, notebook=fake_notebook, ontology=ontology)
def run_feature_collection(ids=None, project="T301", include_failed_sweeps=True, include_failed_cells=False, output_file="", run_parallel=True, data_source="lims", **kwargs): if ids is not None: specimen_ids = ids else: specimen_ids = lq.project_specimen_ids( project, passed_only=not include_failed_cells) logging.info("Number of specimens to process: {:d}".format( len(specimen_ids))) ontology = StimulusOntology( ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE)) get_data_partial = partial(data_for_specimen_id, passed_only=not include_failed_sweeps, data_source=data_source, ontology=ontology) if run_parallel: pool = Pool() results = pool.map(get_data_partial, specimen_ids) else: results = map(get_data_partial, specimen_ids) df = pd.DataFrame([r for r in results if len(r) > 0]) logging.info("shape {}".format(df.shape)) df.set_index("specimen_id").to_csv(output_file)
def load_stimulus_ontology_from_json(self, path: str): """ Attempts to read a stimulus ontology file from a JSON. If successful (and other required data are already set), attempts to run the pre-fx pipeline Parameters ---------- path : load ontology from here """ try: with open(path, "r") as ontology_file: ontology_data = json.load(ontology_file) ontology = StimulusOntology(ontology_data) self.ontology_file = path if self.nwb_path is not None and self.qc_criteria is not None: self.run_extraction_and_auto_qc( self.nwb_path, ontology, self.qc_criteria, commit=True ) else: self.stimulus_ontology = ontology except Exception as err: exception_message( "StimulusOntology load failed", f"failed to load stimulus ontology file from {path}", err )
def run_feature_extraction(input_nwb_file, stimulus_ontology_file, output_nwb_file, qc_fig_dir, sweep_info, cell_info): lu.log_pretty_header("Extract ephys features", level=1) sp.drop_failed_sweeps(sweep_info) if len(sweep_info) == 0: raise er.FeatureError( "There are no QC-passed sweeps available to analyze") if not stimulus_ontology_file: stimulus_ontology_file = StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE logging.info( F"Ontology is not provided, using default {StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE}" ) ont = StimulusOntology(ju.read(stimulus_ontology_file)) data_set = create_data_set(sweep_info=sweep_info, nwb_file=input_nwb_file, ontology=ont, api_sweeps=False) try: cell_features, sweep_features, cell_record, sweep_records = dsft.extract_data_set_features( data_set) if cell_info: cell_record.update(cell_info) cell_state = {"failed_fx": False, "fail_fx_message": None} feature_data = { 'cell_features': cell_features, 'sweep_features': sweep_features, 'cell_record': cell_record, 'sweep_records': sweep_records, 'cell_state': cell_state } except (er.FeatureError, IndexError) as e: cell_state = {"failed_fx": True, "fail_fx_message": str(e)} logging.warning(e) feature_data = {'cell_state': cell_state} if not cell_state["failed_fx"]: sweep_spike_times = collect_spike_times(sweep_features) embed_spike_times(input_nwb_file, output_nwb_file, sweep_spike_times) if qc_fig_dir is None: logging.info("qc_fig_dir is not provided, will not save figures") else: plotqc.display_features(qc_fig_dir, data_set, feature_data) # On Windows int64 keys of sweep numbers cannot be converted to str by json.dump when serializing. # Thus, we are converting them here: feature_data["sweep_features"] = { str(k): v for k, v in feature_data["sweep_features"].items() } return feature_data
def ontology(): return StimulusOntology([[('name', 'long square'), ('code', 'LS')], [('name', 'noise', 'noise 1'), ('code', 'C1NS1')], [('name', 'noise', 'noise 2'), ('code', 'C1NS2')]])
def drop_failed_sweeps( dataset: EphysDataSet, stimulus_ontology: Optional[StimulusOntology] = None, qc_criteria: Optional[Dict] = None ) -> List[Dict]: """A convenience which extracts and QCs sweeps in preparation for dataset feature extraction. This function: 1. extracts sweep qc features 2. removes sweeps tagged with failure messages 3. sets sweep states based on qc results Parameters ---------- dataset : dataset from which to draw sweeps Returns ------- sweep_features : a list of dictionaries, each describing a sweep """ if stimulus_ontology is None: stimulus_ontology = StimulusOntology.default() if qc_criteria is None: qc_criteria = qcp.load_default_qc_criteria() sweep_features = sweep_qc_features(dataset) sweep_props.drop_tagged_sweeps(sweep_features) sweep_props.remove_sweep_feature("tags", sweep_features) sweep_states = qcp.qc_sweeps( stimulus_ontology, sweep_features, qc_criteria ) sweep_props.assign_sweep_states(sweep_states, sweep_features) dataset.sweep_info = sweep_features
def get_empty_dataset(): default_ontology = StimulusOntology( ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE)) dataset = EphysDataSet(default_ontology) dataset.build_sweep_table(sweep_info=[]) return dataset
def get_dataset(): d = get_sweep_table_dict() df = pd.DataFrame(d) default_ontology = StimulusOntology( ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE)) dataset = EphysDataSet(default_ontology) dataset.sweep_table = df return dataset
def nwb_data(tmp_nwb_path): nwbfile = nwbfile_to_test() with pynwb.NWBHDF5IO(path=tmp_nwb_path, mode="w") as writer: writer.write(nwbfile) ontology = StimulusOntology([[('name', 'expected name'), ('code', 'STIMULUS_CODE')], [('name', 'test name'), ('code', 'extpexpend')]]) return EphysNWBData(nwb_file=tmp_nwb_path, ontology=ontology)
def ontology(): return StimulusOntology([[('name', 'ramp stimulus'), ('code', 'RAMP1')], [('name', 'extpinbath stimulus'), ('code', 'extpinbath')], [('name', 'extpbreakn stimulus'), ('code', 'extpbreakn')], [('name', 'Long square stimulus'), ('code', 'Long square')], [('name', 'Short square stimulus'), ('code', 'Short square')], [('name', 'Rheobase stimulus'), ('code', 'Rheobase')], [('name', 'Ramp stimulus'), ('code', 'Ramp')], [('name', 'Capacitance stimulus'), ('code', 'Capacitance')], [('name', 'Chirp stimulus'), ('code', 'Chirp')], [('name', 'extpexpend stimulus'), ('code', 'extpexpend')] ])
def run_sweep_extraction(input_nwb_file, input_h5_file, stimulus_ontology_file, input_manual_values=None): """ Parameters ---------- input_nwb_file input_h5_file stimulus_ontology_file input_manual_values Returns ------- """ lu.log_pretty_header("Extract QC features", level=1) if input_manual_values is None: input_manual_values = {} manual_values = {} for mk in MANUAL_KEYS: if mk in input_manual_values: manual_values[mk] = input_manual_values[mk] if stimulus_ontology_file: mso.make_stimulus_ontology_from_lims(stimulus_ontology_file) else: stimulus_ontology_file = StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE logging.info( F"Ontology is not provided, using default {StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE}" ) ont = StimulusOntology(ju.read(stimulus_ontology_file)) ds = create_data_set(nwb_file=input_nwb_file, h5_file=input_h5_file, ontology=ont) cell_features, cell_tags = qcfe.cell_qc_features(ds, manual_values) for tag in cell_tags: logging.warning(tag) sweep_features = qcfe.sweep_qc_features(ds) return dict( cell_features=cell_features, cell_tags=cell_tags, sweep_features=sweep_features, )
def run_sweep_extraction(input_nwb_file, stimulus_ontology_file=None, input_manual_values=None, update_ontology=True, **unused_args): """ Parameters ---------- input_nwb_file stimulus_ontology_file input_manual_values Returns ------- """ log_pretty_header("Extract QC features", level=1) if input_manual_values is None: input_manual_values = {} manual_values = {} for mk in MANUAL_KEYS: if mk in input_manual_values: manual_values[mk] = input_manual_values[mk] if stimulus_ontology_file and update_ontology: make_stimulus_ontology_from_lims(stimulus_ontology_file) if stimulus_ontology_file is None: stimulus_ontology_file = \ StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE logging.info(f"Ontology is not provided, using default " f"{StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE}") ont = StimulusOntology(json_utilities.read(stimulus_ontology_file)) ds = create_ephys_data_set(nwb_file=input_nwb_file, ontology=ont) cell_features, cell_tags = cell_qc_features(ds, manual_values) for tag in cell_tags: logging.warning(tag) sweep_features = sweep_qc_features(ds) return { "cell_features": cell_features, "cell_tags": cell_tags, "sweep_features": sweep_features, }
def data_for_specimen_id(specimen_id, passed_only): name, roi_id, specimen_id = lq.get_specimen_info_from_lims_by_id(specimen_id) nwb_path = lq.get_nwb_path_from_lims(roi_id) if len(nwb_path) == 0: # could not find an NWB file logging.debug("No NWB file for {:d}".format(specimen_id)) return {"error": {"type": "no_nwb", "details": ""}} # Check if NWB has lab notebook information, or if additional hdf5 file is needed ontology = StimulusOntology(ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE)) h5_path = None with h5py.File(nwb_path, "r") as h5: if "general/labnotebook" not in h5: h5_path = lq.get_igorh5_path_from_lims(roi_id) try: data_set = AibsDataSet(nwb_file=nwb_path, h5_file=h5_path, ontology=ontology) except Exception as detail: logging.warn("Exception when processing specimen {:d}".format(specimen_id)) logging.warn(detail) # return {"error": {"type": "dataset", "details": traceback.format_exc(limit=1)}} return {} try: lsq_sweep_numbers = categorize_iclamp_sweeps(data_set, ontology.long_square_names) ssq_sweep_numbers = categorize_iclamp_sweeps(data_set, ontology.short_square_names) ramp_sweep_numbers = categorize_iclamp_sweeps(data_set, ontology.ramp_names) except Exception as detail: logging.warn("Exception when processing specimen {:d}".format(specimen_id)) logging.warn(detail) # return {"error": {"type": "sweep_table", "details": traceback.format_exc(limit=1)}} return {} try: result = extract_features(data_set, ramp_sweep_numbers, ssq_sweep_numbers, lsq_sweep_numbers) except Exception as detail: logging.warn("Exception when processing specimen {:d}".format(specimen_id)) logging.warn(detail) # return {"error": {"type": "processing", "details": traceback.format_exc(limit=1)}} return {} result["specimen_id"] = specimen_id return result
def create_ephys_data_set(nwb_file: str, sweep_info: Optional[Dict[str, Any]] = None, ontology: Optional[str] = None) -> EphysDataSet: """ Create an ephys data set with the appropriate nwbdata reader class Parameters ---------- nwb_file sweep_info ontology Returns ------- EphysDataSet """ nwb_version = get_nwb_version(nwb_file) is_mies = is_file_mies(nwb_file) if not ontology: ontology = StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE if isinstance(ontology, (str, Path)): ontology = StimulusOntology(ju.read(ontology)) if nwb_version["major"] == 2: if is_mies: labnotebook = LabNotebookReaderIgorNwb(nwb_file) nwb_data = MIESNWBData(nwb_file, labnotebook, ontology) else: nwb_data = HBGNWBData(nwb_file, ontology) else: raise ValueError( "Unsupported or unknown NWB major version {} ({})".format( nwb_version["major"], nwb_version["full"])) return EphysDataSet( sweep_info=sweep_info, data=nwb_data, )
def run_chirp_feature_vector_extraction(output_dir, output_code, include_failed_cells, specimen_ids, chirp_stimulus_codes, data_source="lims", run_parallel=True): logging.info("Number of specimens to process: {:d}".format( len(specimen_ids))) # Include and name chirp stimulus codes in ontology ontology_data = ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE) edited_ontology_data = edit_ontology_data(ontology_data, chirp_stimulus_codes, new_name_tag="Chirp", new_core_tag="Core 2") ontology = StimulusOntology(edited_ontology_data) get_data_partial = partial(data_for_specimen_id, data_source=data_source, ontology=ontology) if run_parallel: pool = Pool() results = pool.map(get_data_partial, specimen_ids) else: results = map(get_data_partial, specimen_ids) used_ids, results, error_set = su.filter_results(specimen_ids, results) logging.info("Finished with {:d} processed specimens".format( len(used_ids))) results_dict = su.organize_results(used_ids, results) su.save_results_to_npy(used_ids, results_dict, output_dir, output_code) su.save_errors_to_json(error_set, output_dir, output_code) logging.info("Finished saving")
def save_cell_data(self, acceptable_stimtypes, non_standard_nwb=False, ephys_dir='preprocessed'): bpopt_stimtype_map = utility.bpopt_stimtype_map distinct_id_map = utility.aibs_stimname_map # Note: may also need to provide h5 "lab notebok" and/or ontology from ipfx.stimulus import StimulusOntology from ipfx.epochs import get_recording_epoch import allensdk.core.json_utilities as ju ontology = StimulusOntology( ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE)) dataset = AibsDataSet(nwb_file=self.nwb_path, ontology=ontology) stim_map = defaultdict(list) stim_sweep_map = {} output_dir = os.path.join(os.getcwd(), ephys_dir) utility.create_dirpath(output_dir) # Note: are QC criteria appropriate for ramps + other stim? passed_sweep_nums = get_passed_sweeps(dataset, self.cell_id) for sweep_num in passed_sweep_nums: record = dataset.get_sweep_record(sweep_num) sweep_number = record[AibsDataSet.SWEEP_NUMBER] stim_type = record[AibsDataSet.STIMULUS_NAME] if stim_type in acceptable_stimtypes: # TODO: use dataset.sweep to get full object, epochs sweep = dataset.get_sweep_data(sweep_number) stimulus_trace = sweep['stimulus'] response_trace = sweep['response'] sampling_rate = sweep['sampling_rate'] # remove missing data # start, end = get_recording_epoch(stimulus_trace) # stimulus_trace = stimulus_trace[:end] # response_trace = response_trace[:end] time = np.arange(0, len(stimulus_trace)) / sampling_rate trace_name = '%s_%d' % ( distinct_id_map[stim_type], sweep_number) if non_standard_nwb: calc_stimparams_func = self.calc_stimparams_nonstandard else: calc_stimparams_func = self.calc_stimparams_ipfx stim_start, stim_stop, stim_amp_start, stim_amp_end, \ tot_duration, hold_curr = calc_stimparams_func( time, stimulus_trace, trace_name) response_trace_short_filename = '%s.%s' % (trace_name, 'txt') response_trace_filename = os.path.join( output_dir, response_trace_short_filename) time *= 1e3 # in ms response_trace *= 1e3 # in mV response_trace = utility.correct_junction_potential(response_trace, self.junction_potential) stimulus_trace *= 1e9 # downsampling time, stimulus_trace, response_trace = utility.downsample_ephys_data( time, stimulus_trace, response_trace) # save current timeseries only when needed if stim_type in utility.bpopt_current_play_stimtypes: with open(response_trace_filename, 'wb') as response_trace_file: np.savetxt(response_trace_file, np.transpose([time, response_trace, stimulus_trace])) else: with open(response_trace_filename, 'wb') as response_trace_file: np.savetxt(response_trace_file, np.transpose([time, response_trace])) stim_map[distinct_id_map[stim_type]].append([ trace_name, bpopt_stimtype_map[stim_type], hold_curr / 1e12, stim_amp_start / 1e12, stim_amp_end / 1e12, stim_start * 1e3, stim_stop * 1e3, tot_duration * 1e3, response_trace_short_filename]) stim_sweep_map[trace_name] = sweep_number logger.debug('Writing stimmap.csv ...') stim_reps_sweep_map, stimmap_filename = self.write_stimmap_csv(stim_map, output_dir, stim_sweep_map) self.write_provenance( output_dir, self.nwb_path, stim_sweep_map, stim_reps_sweep_map) return output_dir, stimmap_filename
def test_get_stimulus_units(NWB_file): default_ontology = StimulusOntology( ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE)) dataset = AibsDataSet(nwb_file=NWB_file, ontology=default_ontology) assert dataset.get_stimulus_units(0) == "Volts"
obt_v, obt_i = EphysDataSet._voltage_current(stimulus, response, EphysDataSet.CURRENT_CLAMP) assert np.allclose(obt_v, response) assert np.allclose(obt_i, stimulus) def test_voltage_current_unequal(): with pytest.raises(ValueError): EphysDataSet._voltage_current(np.arange(2), np.arange(3), EphysDataSet.VOLTAGE_CLAMP) with open(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE, "r") as _def_ont: _default_ont_data = json.load(_def_ont) DEFAULT_ONT = StimulusOntology(_default_ont_data) class EphysDataFixture(EphysDataInterface): """ """ REC_DATE = datetime.strptime("2020-03-19 10:30:12 +1000", "%Y-%m-%d %H:%M:%S %z") SWEEPS: Dict[int, Dict[str, Dict[str, Any]]] = { 1: { "meta": { "sweep_number": 1, "stimulus_units": "amperes", "bridge_balance_mohm": "1.0",
import pandas as pd from ipfx.aibs_data_set import AibsDataSet import ipfx.data_set_features as dsf import ipfx.stim_features as stf import ipfx.stimulus_protocol_analysis as spa import ipfx.feature_vectors as fv from ipfx.stimulus import StimulusOntology from ipfx.sweep import Sweep, SweepSet import allensdk.core.json_utilities as ju import pytest import os from .helpers_for_tests import download_file TEST_OUTPUT_DIR = "/allen/aibs/informatics/module_test_data/ipfx/test_feature_vector" ontology = StimulusOntology( ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE)) @pytest.fixture def feature_vector_input(): TEST_DATA_PATH = os.path.join(os.path.dirname(__file__), 'data') nwb_file_name = "Pvalb-IRES-Cre;Ai14-415796.02.01.01.nwb" nwb_file_full_path = os.path.join(TEST_DATA_PATH, nwb_file_name) if not os.path.exists(nwb_file_full_path): download_file(nwb_file_name, nwb_file_full_path) data_set = AibsDataSet(nwb_file=nwb_file_full_path, ontology=ontology)
def data_for_specimen_id(specimen_id, sweep_qc_option, data_source, ap_window_length=0.006, target_sampling_rate=10000, nfiles=None): logging.debug("specimen_id: {}".format(specimen_id)) lsq_fail = False ssq_fail = False ramp_fail = False # Find or retrieve NWB file and ancillary info and construct an AibsDataSet object ontology = StimulusOntology( ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE)) if data_source == "local": nwb_path = nfiles[specimen_id] if type(nwb_path) is dict and "error" in nwb_path: logging.warning( "Problem getting NWB file for specimen {:d}".format( specimen_id)) return nwb_path data_set = HBGDataSet(nwb_file=nwb_path, ontology=ontology) else: logging.error("invalid data source specified ({})".format(data_source)) # Identify and preprocess long square sweeps try: lsq_sweep_numbers = categorize_iclamp_sweeps( data_set, ontology.long_square_names, sweep_qc_option=sweep_qc_option, specimen_id=specimen_id) (lsq_sweeps, lsq_features, lsq_start, lsq_end, lsq_spx) = preprocess_long_square_sweeps(data_set, lsq_sweep_numbers) except Exception as detail: lsq_fail = True logging.warning( "Exception when preprocessing long square sweeps from specimen {:d}" .format(specimen_id)) logging.warning(detail) return { "error": { "type": "sweep_table", "details": traceback.format_exc(limit=None) } } # Identify and preprocess short square sweeps try: ssq_sweep_numbers = categorize_iclamp_sweeps( data_set, ontology.short_square_names, sweep_qc_option=sweep_qc_option, specimen_id=specimen_id) ssq_sweeps, ssq_features = preprocess_short_square_sweeps( data_set, ssq_sweep_numbers) except Exception as detail: ssq_fail = True logging.warning( "Exception when preprocessing short square sweeps from specimen {:d}" .format(specimen_id)) logging.warning(detail) { "error": { "type": "sweep_table", "details": traceback.format_exc(limit=None) } } # Identify and preprocess ramp sweeps try: ramp_sweep_numbers = categorize_iclamp_sweeps( data_set, ontology.ramp_names, sweep_qc_option=sweep_qc_option, specimen_id=specimen_id) ramp_sweeps, ramp_features = preprocess_ramp_sweeps( data_set, ramp_sweep_numbers) except Exception as detail: ramp_fail = True logging.warning( "Exception when preprocessing ramp sweeps from specimen {:d}". format(specimen_id)) logging.warning(detail) { "error": { "type": "sweep_table", "details": traceback.format_exc(limit=None) } } # Calculate desired feature vectors result = {} try: (subthresh_hyperpol_dict, hyperpol_deflect_dict ) = fv.identify_subthreshold_hyperpol_with_amplitudes( lsq_features, lsq_sweeps) target_amps_for_step_subthresh = [-90, -70, -50, -30, -10] result["step_subthresh"] = fv.step_subthreshold( subthresh_hyperpol_dict, target_amps_for_step_subthresh, lsq_start, lsq_end, amp_tolerance=5) result["subthresh_norm"] = fv.subthresh_norm(subthresh_hyperpol_dict, hyperpol_deflect_dict, lsq_start, lsq_end) (subthresh_depol_dict, depol_deflect_dict) = fv.identify_subthreshold_depol_with_amplitudes( lsq_features, lsq_sweeps) result["subthresh_depol_norm"] = fv.subthresh_depol_norm( subthresh_depol_dict, depol_deflect_dict, lsq_start, lsq_end) isi_sweep, isi_sweep_spike_info = fv.identify_sweep_for_isi_shape( lsq_sweeps, lsq_features, lsq_end - lsq_start) result["isi_shape"] = fv.isi_shape(isi_sweep, isi_sweep_spike_info, lsq_end) if ssq_fail == False: # Calculate waveforms from each type of sweep spiking_ssq_sweep_list = [ ssq_sweeps.sweeps[swp_ind] for swp_ind in ssq_features["common_amp_sweeps"].index ] spiking_ssq_info_list = [ ssq_features["spikes_set"][swp_ind] for swp_ind in ssq_features["common_amp_sweeps"].index ] ssq_ap_v, ssq_ap_dv = fv.first_ap_vectors( spiking_ssq_sweep_list, spiking_ssq_info_list, target_sampling_rate=target_sampling_rate, window_length=ap_window_length, skip_clipped=True) else: ssq_ap_v, ssq_ap_dv = np.nan, np.nan rheo_ind = lsq_features["rheobase_sweep"].name sweep = lsq_sweeps.sweeps[rheo_ind] lsq_ap_v, lsq_ap_dv = fv.first_ap_vectors( [sweep], [lsq_features["spikes_set"][rheo_ind]], target_sampling_rate=target_sampling_rate, window_length=ap_window_length) if ramp_fail == False: spiking_ramp_sweep_list = [ ramp_sweeps.sweeps[swp_ind] for swp_ind in ramp_features["spiking_sweeps"].index ] spiking_ramp_info_list = [ ramp_features["spikes_set"][swp_ind] for swp_ind in ramp_features["spiking_sweeps"].index ] ramp_ap_v, ramp_ap_dv = fv.first_ap_vectors( spiking_ramp_sweep_list, spiking_ramp_info_list, target_sampling_rate=target_sampling_rate, window_length=ap_window_length, skip_clipped=True) else: ramp_ap_v, ramp_ap_dv = np.nan, np.nan if ramp_fail == True: ramp_ap_dv = np.copy(lsq_ap_dv) ramp_ap_v = np.copy(lsq_ap_v) if ssq_fail == True: ssq_ap_dv = np.copy(lsq_ap_dv) ssq_ap_v = np.copy(lsq_ap_v) # Combine so that differences can be assessed by analyses like sPCA result["first_ap_v"] = np.hstack([ssq_ap_v, lsq_ap_v, ramp_ap_v]) result["first_ap_dv"] = np.hstack([ssq_ap_dv, lsq_ap_dv, ramp_ap_dv]) target_amplitudes = np.arange(0, 120, 20) supra_info_list = fv.identify_suprathreshold_spike_info( lsq_features, target_amplitudes, shift=10) result["psth"] = fv.psth_vector(supra_info_list, lsq_start, lsq_end) result["inst_freq"] = fv.inst_freq_vector(supra_info_list, lsq_start, lsq_end) spike_feature_list = [ "upstroke_downstroke_ratio", "peak_v", "fast_trough_v", "threshold_v", "width", ] for feature in spike_feature_list: result["spiking_" + feature] = fv.spike_feature_vector( feature, supra_info_list, lsq_start, lsq_end) if feature == 'width': result["spiking_width"] = result["spiking_width"] / 2 except Exception as detail: logging.warning( "Exception when processing specimen {:d}".format(specimen_id)) logging.warning(detail) return { "error": { "type": "processing", "details": traceback.format_exc(limit=None) } } return result
def data_for_specimen_id(specimen_id, data_source, chirp_stimulus_codes): logging.debug("specimen_id: {}".format(specimen_id)) # Manual edit ontology to identify chirp sweeps ontology_data = ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE) mask = [] for od in ontology_data: mask_val = True for tagset in od: for c in chirp_stimulus_codes: if c in tagset and "code" in tagset: mask_val = False break mask.append(mask_val) ontology_data = [od for od, m in zip(ontology_data, mask) if m is True] ontology_data.append([ ["code"] + chirp_stimulus_codes, [ "name", "Chirp", ], [ "core", "Core 2" ] ]) ontology = StimulusOntology(ontology_data) # Find or retrieve NWB file and ancillary info and construct an AibsDataSet object if data_source == "lims": nwb_path, h5_path = lims_nwb_information(specimen_id) if type(nwb_path) is dict and "error" in nwb_path: logging.warning("Problem getting NWB file for specimen {:d} from LIMS".format(specimen_id)) return nwb_path try: data_set = AibsDataSet( nwb_file=nwb_path, h5_file=h5_path, ontology=ontology) except Exception as detail: logging.warning("Exception when loading specimen {:d} from LIMS".format(specimen_id)) logging.warning(detail) return {"error": {"type": "dataset", "details": traceback.format_exc(limit=None)}} elif data_source == "sdk": nwb_path, sweep_info = sdk_nwb_information(specimen_id) try: data_set = AibsDataSet( nwb_file=nwb_path, sweep_info=sweep_info, ontology=ontology) except Exception as detail: logging.warning("Exception when loading specimen {:d} via Allen SDK".format(specimen_id)) logging.warning(detail) return {"error": {"type": "dataset", "details": traceback.format_exc(limit=None)}} else: logging.error("invalid data source specified ({})".format(data_source)) # Identify chirp sweeps try: iclamp_st = data_set.filtered_sweep_table(clamp_mode=data_set.CURRENT_CLAMP) iclamp_st = data_set.filtered_sweep_table(clamp_mode=data_set.CURRENT_CLAMP, stimuli=["Chirp"]) chirp_sweep_numbers = iclamp_st["sweep_number"].sort_values().values except Exception as detail: logging.warning("Exception when identifying sweeps from specimen {:d}".format(specimen_id)) logging.warning(detail) return {"error": {"type": "sweep_table", "details": traceback.format_exc(limit=1)}} if len(chirp_sweep_numbers) == 0: logging.info("No chirp sweeps for {:d}".format(specimen_id)) return {"error": {"type": "processing", "details:": "no available chirp sweeps"}} try: result = chirp.extract_chirp_feature_vector(data_set, chirp_sweep_numbers) except Exception as detail: logging.warning("Exception when processing specimen {:d}".format(specimen_id)) logging.warning(detail) return {"error": {"type": "processing", "details": traceback.format_exc(limit=1)}} return result
def run_feature_extraction(input_nwb_file, stimulus_ontology_file, output_nwb_file, qc_fig_dir, sweep_info, cell_info, write_spikes=True): lu.log_pretty_header("Extract ephys features", level=1) sp.drop_failed_sweeps(sweep_info) if len(sweep_info) == 0: raise er.FeatureError( "There are no QC-passed sweeps available to analyze") if not stimulus_ontology_file: stimulus_ontology_file = StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE logging.info( F"Ontology is not provided, using default {StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE}" ) ont = StimulusOntology(ju.read(stimulus_ontology_file)) data_set = create_ephys_data_set(sweep_info=sweep_info, nwb_file=input_nwb_file, ontology=ont) (cell_features, sweep_features, cell_record, sweep_records, cell_state, feature_states) = dsft.extract_data_set_features(data_set) if cell_state['failed_fx']: feature_data = {'cell_state': cell_state} else: if cell_info: cell_record.update(cell_info) feature_data = { 'cell_features': cell_features, 'sweep_features': sweep_features, 'cell_record': cell_record, 'sweep_records': sweep_records, 'cell_state': cell_state, 'feature_states': feature_states } if write_spikes: if not feature_states['sweep_features_state']['failed_fx']: sweep_spike_times = collect_spike_times(sweep_features) append_spike_times(input_nwb_file, sweep_spike_times, output_nwb_path=output_nwb_file) else: logging.warn("extract_sweep_features failed, " "unable to write spikes") if qc_fig_dir is None: logging.info("qc_fig_dir is not provided, will not save figures") else: plotqc.display_features(qc_fig_dir, data_set, feature_data) # On Windows int64 keys of sweep numbers cannot be converted to str by json.dump when serializing. # Thus, we are converting them here: feature_data["sweep_features"] = { str(k): v for k, v in feature_data["sweep_features"].items() } return feature_data
import pandas as pd from ipfx.dataset.create import create_ephys_data_set from ipfx.qc_feature_extractor import sweep_qc_features import ipfx.sweep_props as sweep_props import ipfx.qc_feature_evaluator as qcp from ipfx.stimulus import StimulusOntology # Download and access the experimental data from DANDI archive per instructions in the documentation # Example below will use an nwb file provided with the package nwb_file = os.path.join(os.path.dirname(os.getcwd()), "data", "nwb2_H17.03.008.11.03.05.nwb") data_set = create_ephys_data_set(nwb_file=nwb_file) # Compute sweep QC features sweep_features = sweep_qc_features(data_set) # Drop sweeps that failed to compute QC criteria sweep_props.drop_tagged_sweeps(sweep_features) sweep_props.remove_sweep_feature("tags", sweep_features) stimulus_ontology = StimulusOntology.default() qc_criteria = qcp.load_default_qc_criteria() sweep_states = qcp.qc_sweeps(stimulus_ontology, sweep_features, qc_criteria) # print a few sweeps and states print(pd.DataFrame(sweep_features).head()) print(sweep_states[0:len(pd.DataFrame(sweep_features).head())])
def run_feature_vector_extraction(output_dir, data_source, output_code, project, output_file_type, sweep_qc_option, include_failed_cells, run_parallel, ap_window_length, ids=None, file_list=None, **kwargs): """ Extract feature vector from a list of cells and save result to the output file(s) Parameters ---------- output_dir : str see CollectFeatureVectorParameters input schema for details data_source : str see CollectFeatureVectorParameters input schema for details output_code: str see CollectFeatureVectorParameters input schema for details project : str see CollectFeatureVectorParameters input schema for details output_file_type : str see CollectFeatureVectorParameters input schema for details sweep_qc_option: str see CollectFeatureVectorParameters input schema for details include_failed_cells: bool see CollectFeatureVectorParameters input schema for details run_parallel: bool see CollectFeatureVectorParameters input schema for details ap_window_length: float see CollectFeatureVectorParameters input schema for details ids: int ids associated to each cell. file_list: list of str nwbfile names kwargs Returns ------- """ if ids is not None: specimen_ids = ids elif data_source == "lims": specimen_ids = lq.project_specimen_ids( project, passed_only=not include_failed_cells) else: logging.error("Must specify input file if data source is not LIMS") if output_file_type == "h5": # Check that we can access the specified file before processing everything h5_file = h5py.File( os.path.join(output_dir, "fv_{}.h5".format(output_code))) h5_file.close() ontology = StimulusOntology( ju.read(StimulusOntology.DEFAULT_STIMULUS_ONTOLOGY_FILE)) logging.info("Number of specimens to process: {:d}".format( len(specimen_ids))) get_data_partial = partial(data_for_specimen_id, sweep_qc_option=sweep_qc_option, data_source=data_source, ontology=ontology, ap_window_length=ap_window_length, file_list=file_list) if run_parallel: pool = Pool() results = pool.map(get_data_partial, specimen_ids) else: results = map(get_data_partial, specimen_ids) used_ids, results, error_set = su.filter_results(specimen_ids, results) logging.info("Finished with {:d} processed specimens".format( len(used_ids))) results_dict = su.organize_results(used_ids, results) if output_file_type == "h5": su.save_results_to_h5(used_ids, results_dict, output_dir, output_code) elif output_file_type == "npy": su.save_results_to_npy(used_ids, results_dict, output_dir, output_code) else: raise ValueError( "Unknown output_file_type option {} (allowed values are h5 and npy)" .format(output_file_type)) su.save_errors_to_json(error_set, output_dir, output_code) logging.info("Finished saving")