def load_raw_data(self): """ Loads the TTLs, raw task data and task settings :return: """ self.log.info(f"Loading raw data from {self.session_path}") self.type = self.type or get_session_extractor_type(self.session_path) self.settings, self.raw_data = raw.load_bpod(self.session_path) # Fetch the TTLs for the photodiode and audio if self.type != 'ephys' or self.bpod_only is True: # Extract from Bpod self.frame_ttls, self.audio_ttls = raw.load_bpod_fronts( self.session_path, data=self.raw_data) else: # Extract from FPGA sync, chmap = ephys_fpga.get_main_probe_sync(self.session_path) def channel_events(name): """Fetches the polarities and times for a given channel""" keys = ('polarities', 'times') mask = sync['channels'] == chmap[name] return dict(zip(keys, (sync[k][mask] for k in keys))) ttls = [ channel_events(ch) for ch in ('frame2ttl', 'audio', 'bpod') ] self.frame_ttls, self.audio_ttls, self.bpod_ttls = ttls
def extract_all(session_path, save=True, bin_exists=False): """ For the IBL ephys task, reads ephys binary file and extract: - sync - wheel - behaviour - video time stamps :param session_path: '/path/to/subject/yyyy-mm-dd/001' :param save: Bool, defaults to False :return: outputs, files """ extractor_type = extractors_base.get_session_extractor_type(session_path) _logger.info(f"Extracting {session_path} as {extractor_type}") basecls = [FpgaTrials] if extractor_type in ['ephys', 'mock_ephys', 'sync_ephys']: basecls.extend([ProbaContrasts]) elif extractor_type in ['ephys_biased_opto']: from ibllib.io.extractors import opto_trials basecls.extend([ biased_trials.ProbabilityLeft, biased_trials.ContrastLR, opto_trials.LaserBool ]) sync, chmap = get_main_probe_sync(session_path, bin_exists=bin_exists) outputs, files = extractors_base.run_extractor_classes( basecls, session_path=session_path, save=save, sync=sync, chmap=chmap) return outputs, files
def extract_data(self): """Extracts and loads behaviour data for QC NB: partial extraction when bpod_only attribute is False requires intervals and intervals_bpod to be assigned to the data attribute before calling this function. :return: """ self.log.info(f"Extracting session: {self.session_path}") self.type = self.type or get_session_extractor_type(self.session_path) self.wheel_encoding = 'X4' if (self.type == 'ephys' and not self.bpod_only) else 'X1' if not self.raw_data: self.load_raw_data() # Run extractors if self.type == 'ephys' and not self.bpod_only: data, _ = ephys_fpga.extract_all(self.session_path) bpod2fpga = interp1d(data['intervals_bpod'][:, 0], data['table']['intervals_0'], fill_value='extrapolate') # Add Bpod wheel data re_ts, pos = get_wheel_position(self.session_path, self.raw_data) data['wheel_timestamps_bpod'] = bpod2fpga(re_ts) data['wheel_position_bpod'] = pos else: kwargs = dict(save=False, bpod_trials=self.raw_data, settings=self.settings) trials, wheel, _ = bpod_trials.extract_all(self.session_path, **kwargs) n_trials = np.unique(list(map(lambda k: trials[k].shape[0], trials)))[0] if self.type == 'habituation': data = trials data['position'] = np.array([t['position'] for t in self.raw_data]) data['phase'] = np.array([t['stim_phase'] for t in self.raw_data]) # Nasty hack to trim last trial due to stim off events happening at trial num + 1 data = {k: v[:n_trials] for k, v in data.items()} else: data = {**trials, **wheel} # Update the data attribute with extracted data self.data = self.rename_data(data)
def ensure_required_data(self): """ Ensures the datasets required for QC are local. If the download_data attribute is True, any missing data are downloaded. If all the data are not present locally at the end of it an exception is raised. If the stream attribute is True, the video file is not required to be local, however it must be remotely accessible. NB: Requires a valid instance of ONE and a valid session eid. :return: """ assert self.one is not None, 'ONE required to download data' # dataset collections outside this list are ignored (e.g. probe00, raw_passive_data) collections = ('alf', 'raw_ephys_data', 'raw_behavior_data', 'raw_video_data') # Get extractor type is_ephys = 'ephys' in (self.type or self.one.get_details( self.eid)['task_protocol']) dtypes = self.dstypes + self.dstypes_fpga if is_ephys else self.dstypes assert_unique = True # Check we have raw ephys data for session if is_ephys and len( self.one.list_datasets(self.eid, collection='raw_ephys_data')) == 0: # Assert 3A probe model; if so download all probe data det = self.one.get_details(self.eid, full=True) probe_model = next(x['model'] for x in det['probe_insertion']) assert probe_model == '3A', 'raw ephys data missing' collections += ('raw_ephys_data/probe00', 'raw_ephys_data/probe01') assert_unique = False for dstype in dtypes: datasets = self.one.type2datasets(self.eid, dstype, details=True) if 'camera' in dstype.lower(): # Download individual camera file datasets = filter_datasets(datasets, filename=f'.*{self.label}.*') else: # Ignore probe datasets, etc. datasets = filter_datasets(datasets, collection=collections, assert_unique=assert_unique) if any(x.endswith('.mp4') for x in datasets.rel_path) and self.stream: names = [ x.split('/')[-1] for x in self.one.list_datasets(self.eid, details=False) ] assert f'_iblrig_{self.label}Camera.raw.mp4' in names, 'No remote video file found' continue optional = ('camera.times', '_iblrig_Camera.raw', 'wheel.position', 'wheel.timestamps', '_iblrig_Camera.frame_counter', '_iblrig_Camera.GPIO') present = (self.one._download_datasets(datasets) if self.download_data else (next(self.session_path.rglob(d), None) for d in datasets['rel_path'])) required = (dstype not in optional) all_present = not datasets.empty and all(present) assert all_present or not required, f'Dataset {dstype} not found' self._type = get_session_extractor_type(self.session_path)
def _get_pipeline_class(session_path, one): pipeline = get_pipeline(session_path) if pipeline == 'training': PipelineClass = training_preprocessing.TrainingExtractionPipeline elif pipeline == 'ephys': PipelineClass = ephys_preprocessing.EphysExtractionPipeline else: # try and look if there is a custom extractor in the personal projects extraction class import projects.base task_type = get_session_extractor_type(session_path) PipelineClass = projects.base.get_pipeline(task_type) _logger.info(f"Using {PipelineClass} pipeline for {session_path}") return PipelineClass(session_path=session_path, one=one)
def job_creator(root_path, one=None, dry=False, rerun=False, max_md5_size=None): """ Server function that will look for creation flags and for each: 1) create the sessions on Alyx 2) register the corresponding raw data files on Alyx 3) create the tasks to be run on Alyx :param root_path: main path containing sessions or session path :param one :param dry :param rerun :param max_md5_size :return: """ if not one: one = ONE() rc = registration.RegistrationClient(one=one) flag_files = list(Path(root_path).glob('**/raw_session.flag')) all_datasets = [] for flag_file in flag_files: session_path = flag_file.parent _logger.info(f'creating session for {session_path}') if dry: continue # if the subject doesn't exist in the database, skip try: rc.create_session(session_path) files, dsets = registration.register_session_raw_data( session_path, one=one, max_md5_size=max_md5_size) except BaseException: _logger.error(traceback.format_exc()) _logger.info(f"Creating session / registering raw datasets {session_path} errored") continue if dsets is not None: all_datasets.extend(dsets) session_type = get_session_extractor_type(session_path) if session_type in ['biased', 'habituation', 'training']: pipe = training_preprocessing.TrainingExtractionPipeline(session_path, one=one) # only start extracting ephys on a raw_session.flag elif session_type in ['ephys'] and flag_file.name == 'raw_session.flag': pipe = ephys_preprocessing.EphysExtractionPipeline(session_path, one=one) else: _logger.info(f"Session type {session_type} as no matching extractor {session_path}") return if rerun: rerun__status__in = '__all__' else: rerun__status__in = ['Waiting'] pipe.create_alyx_tasks(rerun__status__in=rerun__status__in) flag_file.unlink() return all_datasets
def extract_all(session_path, save=True, bin_exists=False): """ For the IBL ephys task, reads ephys binary file and extract: - sync - wheel - behaviour - video time stamps :param session_path: '/path/to/subject/yyyy-mm-dd/001' :param save: Bool, defaults to False :return: outputs, files """ extractor_type = extractors_base.get_session_extractor_type(session_path) _logger.info(f"Extracting {session_path} as {extractor_type}") basecls = [FpgaTrials] sync, chmap = get_main_probe_sync(session_path, bin_exists=bin_exists) outputs, files = extractors_base.run_extractor_classes( basecls, session_path=session_path, save=save, sync=sync, chmap=chmap) return outputs, files
def __init__(self, session_path_or_eid, camera, **kwargs): """ :param session_path_or_eid: A session id or path :param camera: The camera to run QC on, if None QC is run for all three cameras :param n_samples: The number of frames to sample for the position and brightness QC :param stream: If true and local video files not available, the data are streamed from the remote source. :param log: A logging.Logger instance, if None the 'ibllib' logger is used :param one: An ONE instance for fetching and setting the QC on Alyx """ # When an eid is provided, we will download the required data by default (if necessary) download_data = not is_session_path(session_path_or_eid) self.download_data = kwargs.pop('download_data', download_data) self.stream = kwargs.pop('stream', None) self.n_samples = kwargs.pop('n_samples', 100) super().__init__(session_path_or_eid, **kwargs) # Data self.label = assert_valid_label(camera) filename = f'_iblrig_{self.label}Camera.raw*.mp4' raw_video_path = self.session_path.joinpath('raw_video_data') self.video_path = next(raw_video_path.glob(filename), None) # If local video doesn't exist, change video path to URL if not self.video_path and self.stream is not False and self.one is not None: try: self.stream = True self.video_path = self.one.path2url(raw_video_path / filename.replace('*', '')) except (StopIteration, ALFObjectNotFound): _log.error('No remote or local video file found') self.video_path = None logging.disable(logging.CRITICAL) self._type = get_session_extractor_type(self.session_path) or None logging.disable(logging.NOTSET) keys = ('count', 'pin_state', 'audio', 'fpga_times', 'wheel', 'video', 'frame_samples', 'timestamps', 'camera_times', 'bonsai_times') self.data = Bunch.fromkeys(keys) self.frame_samples_idx = None # QC outcomes map self.metrics = None self.outcome = 'NOT_SET'
def create_pipeline(session_path): # creates the session if necessary task_type = get_session_extractor_type(session_path) print(session_path, task_type) session_path.joinpath('raw_session.flag').touch() # delete the session if it exists eid = one.eid_from_path(session_path) if eid is not None: one.alyx.rest('sessions', 'delete', id=eid) local_server.job_creator(session_path, one=one, max_md5_size=1024 * 1024 * 20) eid = one.eid_from_path(session_path) assert (eid) alyx_tasks = one.alyx.rest('tasks', 'list', session=eid, graph='TrainingExtractionPipeline') assert (len(alyx_tasks) == 5)
def _ensure_required_data(self): """ Ensures the datasets required for QC are local. If the download_data attribute is True, any missing data are downloaded. If all the data are not present locally at the end of it an exception is raised. If the stream attribute is True, the video file is not required to be local, however it must be remotely accessible. NB: Requires a valid instance of ONE and a valid session eid. :return: """ assert self.one is not None, 'ONE required to download data' # dataset collections outside this list are ignored (e.g. probe00, raw_passive_data) collections = ('alf', 'raw_ephys_data', 'raw_behavior_data', 'raw_video_data') # Get extractor type is_ephys = 'ephys' in (self.type or self.one.get_details( self.eid)['task_protocol']) dtypes = self.dstypes + self.dstypes_fpga if is_ephys else self.dstypes for dstype in dtypes: dataset = self.one.datasets_from_type(self.eid, dstype, full=True) if 'camera' in dstype.lower(): # Download individual camera file dataset = [d for d in dataset if self.side in d['name']] else: # Ignore probe datasets, etc. dataset = [ d for d in dataset if d['collection'] in collections ] if any(x['name'].endswith('.mp4') for x in dataset) and self.stream: names = [x.name for x in self.one.list(self.eid)] assert f'_iblrig_{self.side}Camera.raw.mp4' in names, 'No remote video file found' continue optional = ('camera.times', '_iblrig_Camera.raw', 'wheel.position', 'wheel.timestamps', '_iblrig_Camera.frame_counter', '_iblrig_Camera.GPIO') required = (dstype not in optional) present = (self.one.download_datasets(dataset) if self.download_data else (next(self.session_path.rglob(d['name']), None) for d in dataset)) assert (dataset and all(present) ) or not required, f'Dataset {dstype} not found' self._type = get_session_extractor_type(self.session_path)
def _ensure_required_data(self): """ Attempt to download any required raw data if missing, and raise exception if any data are missing. :return: """ dstypes = [ "_iblrig_taskData.raw", "_iblrig_taskSettings.raw", "_iblrig_encoderPositions.raw", "_iblrig_encoderEvents.raw", "_iblrig_stimPositionScreen.raw", "_iblrig_syncSquareUpdate.raw", "_iblrig_encoderTrialInfo.raw", "_iblrig_ambientSensorData.raw", ] eid = self.one.path2eid(self.session_path) self.log.info(f"Downloading data for session {eid}") # Ensure we have the settings settings, _ = self.one.load_datasets(eid, ["_iblrig_taskSettings.raw.json"], collections=['raw_behavior_data'], download_only=True, assert_present=False) if settings and get_session_extractor_type(self.session_path) == 'ephys': dstypes.extend(['_spikeglx_sync.channels', '_spikeglx_sync.polarities', '_spikeglx_sync.times', 'ephysData.raw.meta', 'ephysData.raw.wiring']) dataset = self.one.type2datasets(eid, dstypes, details=True) files = self.one._download_datasets(dataset) missing = [True] * len(dstypes) if not files else [x is None for x in files] if self.session_path is None or all(missing): self.lazy = True self.log.error("Data not found on server, can't calculate QC.") elif any(missing): self.log.warning( f"Missing some datasets for session {eid} in path {self.session_path}" )
def _run(self): """ Extracts an iblrig training session """ trials, wheel, output_files = bpod_trials.extract_all( self.session_path, save=True) if trials is None: return None # Run the task QC # Compile task data for QC type = get_session_extractor_type(self.session_path) if type == 'habituation': qc = HabituationQC(self.session_path, one=self.one) qc.extractor = TaskQCExtractor(self.session_path, one=self.one) else: # Update wheel data qc = TaskQC(self.session_path, one=self.one) qc.extractor = TaskQCExtractor(self.session_path, one=self.one) qc.extractor.wheel_encoding = 'X1' # Aggregate and update Alyx QC fields qc.run(update=True) return output_files
def get_trials_task(session_path, one): pipeline = get_pipeline(session_path) if pipeline == 'training': from ibllib.pipes.training_preprocessing import TrainingTrials task = TrainingTrials(session_path, one=one) elif pipeline == 'ephys': from ibllib.pipes.ephys_preprocessing import EphysTrials task = EphysTrials(session_path, one=one) else: try: # try and look if there is a custom extractor in the personal projects extraction class import projects.base task_type = get_session_extractor_type(session_path) PipelineClass = projects.base.get_pipeline(task_type) pipeline = PipelineClass(session_path, one) trials_task_name = next(task for task in pipeline.tasks if 'Trials' in task) task = pipeline.tasks.get(trials_task_name) except Exception: task = None return task
def extract_all(session_path, session_type=None, save=True, **kwargs): """ For the IBL ephys task, reads ephys binary file and extract: - video time stamps :param session_path: '/path/to/subject/yyyy-mm-dd/001' :param session_type: the session type to extract, i.e. 'ephys', 'training' or 'biased'. If None the session type is inferred from the settings file. :param save: Bool, defaults to False :param kwargs: parameters to pass to the extractor :return: outputs, files """ if session_type is None: session_type = get_session_extractor_type(session_path) if not session_type or session_type not in _get_task_types_json_config( ).values(): raise ValueError( f"Session type {session_type} has no matching extractor") elif 'ephys' in session_type: # assume ephys == FPGA labels = assert_valid_label( kwargs.pop('labels', ('left', 'right', 'body'))) labels = (labels, ) if isinstance(labels, str) else labels # Ensure list/tuple extractor = [partial(CameraTimestampsFPGA, label) for label in labels] if 'sync' not in kwargs: kwargs['sync'], kwargs['chmap'] = \ get_main_probe_sync(session_path, bin_exists=kwargs.pop('bin_exists', False)) else: # assume Bpod otherwise assert kwargs.pop('labels', 'left'), 'only left camera is currently supported' extractor = CameraTimestampsBpod outputs, files = run_extractor_classes(extractor, session_path=session_path, save=save, **kwargs) return outputs, files
def get_training_info_for_session(session_paths, one): """ Extract the training information needed for plots for each session :param session_paths: list of session paths on same date :param one: ONE instance :return: """ # return list of dicts to add sess_dicts = [] for session_path in session_paths: session_path = Path(session_path) sess_dict = {} sess_dict['date'] = str(one.path2ref(session_path)['date']) sess_dict['session_path'] = str(session_path) sess_dict['task_protocol'] = get_session_extractor_type(session_path) if sess_dict['task_protocol'] == 'habituation': nan_array = np.array([np.nan]) sess_dict['performance'], sess_dict['contrasts'], _ = (nan_array, nan_array, np.nan) sess_dict['performance_easy'] = np.nan sess_dict['reaction_time'] = np.nan sess_dict['n_trials'] = np.nan sess_dict['sess_duration'] = np.nan sess_dict['n_delay'] = np.nan sess_dict['location'] = np.nan sess_dict['training_status'] = 'habituation' else: # if we can't compute trials then we need to pass trials = load_trials(session_path, one) if trials is None: continue sess_dict['performance'], sess_dict[ 'contrasts'], _ = training.compute_performance(trials, prob_right=True) sess_dict['performance_easy'] = training.compute_performance_easy( trials) sess_dict['reaction_time'] = training.compute_median_reaction_time( trials) sess_dict['n_trials'] = training.compute_n_trials(trials) sess_dict['sess_duration'], sess_dict['n_delay'], sess_dict['location'] = \ compute_session_duration_delay_location(session_path) sess_dict['task_protocol'] = get_session_extractor_type( session_path) sess_dict['training_status'] = 'not_computed' sess_dicts.append(sess_dict) protocols = [s['task_protocol'] for s in sess_dicts] if len(protocols) > 0 and len(set(protocols)) != 1: print( f'Different protocols on same date {sess_dicts[0]["date"]} : {protocols}' ) if len(sess_dicts) > 1 and len( set(protocols)) == 1: # Only if all protocols are the same print( f'{len(sess_dicts)} sessions being combined for date {sess_dicts[0]["date"]}' ) combined_trials = load_combined_trials(session_paths, one) performance, contrasts, _ = training.compute_performance( combined_trials, prob_right=True) performance_easy = training.compute_performance_easy(combined_trials) reaction_time = training.compute_median_reaction_time(combined_trials) n_trials = training.compute_n_trials(combined_trials) sess_duration = np.nansum([s['sess_duration'] for s in sess_dicts]) n_delay = np.nanmax([s['n_delay'] for s in sess_dicts]) for sess_dict in sess_dicts: sess_dict['combined_performance'] = performance sess_dict['combined_contrasts'] = contrasts sess_dict['combined_performance_easy'] = performance_easy sess_dict['combined_reaction_time'] = reaction_time sess_dict['combined_n_trials'] = n_trials sess_dict['combined_sess_duration'] = sess_duration sess_dict['combined_n_delay'] = n_delay # Case where two sessions on same day with different number of contrasts! Oh boy if sess_dict['combined_performance'].size != sess_dict[ 'performance'].size: sess_dict['performance'] = \ np.r_[sess_dict['performance'], np.full(sess_dict['combined_performance'].size - sess_dict['performance'].size, np.nan)] sess_dict['contrasts'] = \ np.r_[sess_dict['contrasts'], np.full(sess_dict['combined_contrasts'].size - sess_dict['contrasts'].size, np.nan)] else: for sess_dict in sess_dicts: sess_dict['combined_performance'] = sess_dict['performance'] sess_dict['combined_contrasts'] = sess_dict['contrasts'] sess_dict['combined_performance_easy'] = sess_dict[ 'performance_easy'] sess_dict['combined_reaction_time'] = sess_dict['reaction_time'] sess_dict['combined_n_trials'] = sess_dict['n_trials'] sess_dict['combined_sess_duration'] = sess_dict['sess_duration'] sess_dict['combined_n_delay'] = sess_dict['n_delay'] return sess_dicts