def lims_validation(lims_id): '''Validation function to check LIMS session for files associated with the Day1 and Day2 uploads. Uses D1 and D2 schema to define 1) which files to looks for, and 2) how big they should be (right now just a min_size threshold defined as 80% of the size of a random good session. This could be improved...) If you would like to add files to this validation, edit the D1_LIMS_schema, or D2_LIMS_schema files (imported at top). INPUT: lims_id: lims ecephys session id OUTPUT: lims_validator: dictionary with 4 keys: storage_directory: storage directory for session in LIMS D1: results of checking LIMS data against the D1_schema (output of check_schema) D2: results of checking LIMS data against the D2_schema Error: reporting of any errors encountered while attempting to run validation on data ''' try: d = data_getters.lims_data_getter(lims_id) paths = d.data_dict storage_dir = os.path.normpath(paths['storage_directory']) lims_validator = {'storage_directory': storage_dir, 'D1': {}, 'D2': {}} lims_validator['D1'] = check_schema(D1_schema, paths) lims_validator['D2'] = check_schema(D2_schema, paths) lims_validator['Error'] = None except: #TODO: This error handling isn't great. Difficult to traceback. lims_validator = { 'Error': str(sys.exc_info()[0]) + ' ' + str(sys.exc_info()[1]) + ' ' + str(sys.exc_info()[2]) } return lims_validator
def from_lims(cls, ephys_experiment_id: int): file_paths = data_getters.lims_data_getter(exp_id=ephys_experiment_id) return cls(data_paths=file_paths.data_dict)
def run_qc(exp_id, save_root): identifier = exp_id if identifier.find('_') >= 0: d = data_getters.local_data_getter(base_dir=identifier) else: d = data_getters.lims_data_getter(exp_id=identifier) paths = d.data_dict FIG_SAVE_DIR = os.path.join( save_root, paths['es_id'] + '_' + paths['external_specimen_name'] + '_' + paths['datestring']) if not os.path.exists(FIG_SAVE_DIR): os.mkdir(FIG_SAVE_DIR) figure_prefix = paths['external_specimen_name'] + '_' + paths[ 'datestring'] + '_' ### GET FILE PATHS TO SYNC AND PKL FILES ### SYNC_FILE = paths['sync_file'] BEHAVIOR_PKL = paths['behavior_pkl'] REPLAY_PKL = paths['replay_pkl'] MAPPING_PKL = paths['mapping_pkl'] for f, s in zip([SYNC_FILE, BEHAVIOR_PKL, REPLAY_PKL, MAPPING_PKL], ['sync: ', 'behavior: ', 'replay: ', 'mapping: ']): print(s + f) ### GET MAIN DATA STREAMS ### syncDataset = sync_dataset(SYNC_FILE) behavior_data = pd.read_pickle(BEHAVIOR_PKL) mapping_data = pd.read_pickle(MAPPING_PKL) replay_data = pd.read_pickle(REPLAY_PKL) ### Behavior Analysis ### behavior_plot_dir = os.path.join(FIG_SAVE_DIR, 'behavior') trials = behavior_analysis.get_trials_df(behavior_data) behavior_analysis.plot_behavior(trials, behavior_plot_dir, prefix=figure_prefix) trial_types, counts = behavior_analysis.get_trial_counts(trials) behavior_analysis.plot_trial_type_pie(counts, trial_types, behavior_plot_dir, prefix=figure_prefix) ### CHECK FRAME COUNTS ### vr, vf = probeSync.get_sync_line_data(syncDataset, channel=2) behavior_frame_count = behavior_data['items']['behavior'][ 'intervalsms'].size + 1 mapping_frame_count = mapping_data['intervalsms'].size + 1 replay_frame_count = replay_data['intervalsms'].size + 1 total_pkl_frames = (behavior_frame_count + mapping_frame_count + replay_frame_count) ### CHECK THAT NO FRAMES WERE DROPPED FROM SYNC ### print('frames in pkl files: {}'.format(total_pkl_frames)) print('frames in sync file: {}'.format(len(vf))) #assert(total_pkl_frames==len(vf)) ### CHECK THAT REPLAY AND BEHAVIOR HAVE SAME FRAME COUNT ### print('frames in behavior stim: {}'.format(behavior_frame_count)) print('frames in replay stim: {}'.format(replay_frame_count)) #assert(behavior_frame_count==replay_frame_count) # look for potential frame offsets from aborted stims (behavior_start_frame, mapping_start_frame, replay_start_frame) = probeSync.get_frame_offsets( syncDataset, [behavior_frame_count, mapping_frame_count, replay_frame_count]) behavior_end_frame = behavior_start_frame + behavior_frame_count - 1 mapping_end_frame = mapping_start_frame + mapping_frame_count - 1 replay_end_frame = replay_start_frame + replay_frame_count - 1 MONITOR_LAG = 0.036 #TO DO: don't hardcode this... FRAME_APPEAR_TIMES = vf + MONITOR_LAG behavior_start_time, mapping_start_time, replay_start_time = [ FRAME_APPEAR_TIMES[f] for f in [behavior_start_frame, mapping_start_frame, replay_start_frame] ] behavior_end_time, mapping_end_time, replay_end_time = [ FRAME_APPEAR_TIMES[f] for f in [behavior_end_frame, mapping_end_frame, replay_end_frame] ] ### Plot vsync info ### vsync_save_dir = os.path.join(FIG_SAVE_DIR, 'vsyncs') analysis.plot_frame_intervals(vf, behavior_frame_count, mapping_frame_count, behavior_start_frame, mapping_start_frame, replay_start_frame, vsync_save_dir, prefix=figure_prefix) analysis.plot_vsync_interval_histogram(vf, vsync_save_dir, prefix=figure_prefix) analysis.vsync_report(vf, total_pkl_frames, vsync_save_dir, prefix=figure_prefix) ### BUILD UNIT TABLE #### probe_dict = probeSync.build_unit_table(paths['data_probes'], paths, syncDataset) ### Plot Probe Yield QC ### probe_yield_dir = os.path.join(FIG_SAVE_DIR, 'probe_yield') probe_dirs = [paths['probe' + pid] for pid in paths['data_probes']] analysis.plot_unit_quality_hist(probe_dict, probe_yield_dir, prefix=figure_prefix) analysis.plot_unit_distribution_along_probe(probe_dict, probe_yield_dir, prefix=figure_prefix) analysis.plot_all_spike_hist(probe_dict, probe_yield_dir, prefix=figure_prefix + 'good') analysis.copy_probe_depth_images(paths, probe_yield_dir, prefix=figure_prefix) ### Unit Metrics ### unit_metrics_dir = os.path.join(FIG_SAVE_DIR, 'unit_metrics') analysis.plot_unit_metrics(paths, unit_metrics_dir, prefix=figure_prefix) ### Probe/Sync alignment probeSyncDir = os.path.join(FIG_SAVE_DIR, 'probeSyncAlignment') analysis.plot_barcode_interval_hist(probe_dirs, syncDataset, probeSyncDir, prefix=figure_prefix) analysis.plot_barcode_intervals(probe_dirs, syncDataset, probeSyncDir, prefix=figure_prefix) analysis.probe_sync_report(probe_dirs, syncDataset, probeSyncDir, prefix=figure_prefix) analysis.plot_barcode_matches(probe_dirs, syncDataset, probeSyncDir, prefix=figure_prefix) ### Plot visual responses get_RFs(probe_dict, mapping_data, mapping_start_frame, FRAME_APPEAR_TIMES, os.path.join(FIG_SAVE_DIR, 'receptive_fields'), prefix=figure_prefix) analysis.plot_population_change_response(probe_dict, behavior_frame_count, mapping_frame_count, trials, FRAME_APPEAR_TIMES, os.path.join( FIG_SAVE_DIR, 'change_response'), ctx_units_percentile=66, prefix=figure_prefix) ### Plot running ### analysis.plot_running_wheel(behavior_data, mapping_data, replay_data, behavior_plot_dir, prefix=figure_prefix) ### LFP ### lfp_save_dir = os.path.join(FIG_SAVE_DIR, 'LFP') lick_times = analysis.get_rewarded_lick_times( probeSync.get_lick_times(syncDataset), FRAME_APPEAR_TIMES, trials, min_inter_lick_time=0.5) lfp_dict = probeSync.build_lfp_dict(probe_dirs, syncDataset) analysis.plot_lick_triggered_LFP(lfp_dict, lick_times, lfp_save_dir, prefix=figure_prefix, agarChRange=None, num_licks=20, windowBefore=0.5, windowAfter=1.5, min_inter_lick_time=0.5, behavior_duration=3600) ### VIDEOS ### video_dir = os.path.join(FIG_SAVE_DIR, 'videos') analysis.lost_camera_frame_report(paths, video_dir, prefix=figure_prefix) analysis.camera_frame_grabs( paths, syncDataset, video_dir, [behavior_start_time, mapping_start_time, replay_start_time], [behavior_end_time, mapping_end_time, replay_end_time], epoch_frame_nums=[2, 2, 2], prefix=figure_prefix)
### SPECIFY EXPERIMENT TO PULL #### #this should be either the ten digit lims id: #identifier = '1013651431' #lims id #or the local base directory identifier = r'\\10.128.50.43\sd6.3\1033616558_509940_20200701' identifier = r'\\10.128.50.43\sd6.3\1033388795_509652_20200630' identifier = r"\\10.128.50.43\sd6.3\1033387557_509940_20200630" identifier = r"\\10.128.50.43\sd6.3\1037927382_513573_20200722" identifier = r"\\10.128.50.43\sd6.3\1038127711_513573_20200723" identifier = '1041287144' if identifier.find('_') >= 0: d = data_getters.local_data_getter(base_dir=identifier) else: d = data_getters.lims_data_getter(exp_id=identifier) paths = d.data_dict FIG_SAVE_DIR = os.path.join( r"\\allen\programs\braintv\workgroups\nc-ophys\corbettb\NP_behavior_pipeline\QC", paths['es_id'] + '_' + paths['external_specimen_name'] + '_' + paths['datestring']) if not os.path.exists(FIG_SAVE_DIR): os.mkdir(FIG_SAVE_DIR) ### GET FILE PATHS TO SYNC AND PKL FILES ### SYNC_FILE = paths['sync_file'] BEHAVIOR_PKL = paths['behavior_pkl'] REPLAY_PKL = paths['replay_pkl'] MAPPING_PKL = paths['mapping_pkl']
def __init__(self, exp_id, save_root, modules_to_run='all'): self.modules_to_run = modules_to_run identifier = exp_id if identifier.find('_')>=0: d = data_getters.local_data_getter(base_dir=identifier) else: d = data_getters.lims_data_getter(exp_id=identifier) self.paths = d.data_dict self.FIG_SAVE_DIR = os.path.join(save_root, self.paths['es_id']+'_'+ self.paths['external_specimen_name']+'_'+ self.paths['datestring']) if not os.path.exists(self.FIG_SAVE_DIR): os.mkdir(self.FIG_SAVE_DIR) self.figure_prefix = self.paths['external_specimen_name'] + '_' + self.paths['datestring'] + '_' ### GET FILE PATHS TO SYNC AND PKL FILES ### self.SYNC_FILE = self.paths['sync_file'] self.BEHAVIOR_PKL = self.paths['behavior_pkl'] self.REPLAY_PKL = self.paths['replay_pkl'] self.MAPPING_PKL = self.paths['mapping_pkl'] self.OPTO_PKL = self.paths['opto_pkl'] for f,s in zip([self.SYNC_FILE, self.BEHAVIOR_PKL, self.REPLAY_PKL, self.MAPPING_PKL], ['sync: ', 'behavior: ', 'replay: ', 'mapping: ']): print(s + f) ### GET MAIN DATA STREAMS ### self.syncDataset = sync_dataset(self.SYNC_FILE) self.behavior_data = pd.read_pickle(self.BEHAVIOR_PKL) self.mapping_data = pd.read_pickle(self.MAPPING_PKL) self.replay_data = pd.read_pickle(self.REPLAY_PKL) self.opto_data = pd.read_pickle(self.OPTO_PKL) self.trials = behavior_analysis.get_trials_df(self.behavior_data) ### CHECK FRAME COUNTS ### vr, self.vf = probeSync.get_sync_line_data(self.syncDataset, channel=2) self.behavior_frame_count = self.behavior_data['items']['behavior']['intervalsms'].size + 1 self.mapping_frame_count = self.mapping_data['intervalsms'].size + 1 self.replay_frame_count = self.replay_data['intervalsms'].size + 1 self.total_pkl_frames = (self.behavior_frame_count + self.mapping_frame_count + self.replay_frame_count) ### CHECK THAT NO FRAMES WERE DROPPED FROM SYNC ### print('frames in pkl files: {}'.format(self.total_pkl_frames)) print('frames in sync file: {}'.format(len(self.vf))) #assert(total_pkl_frames==len(vf)) ### CHECK THAT REPLAY AND BEHAVIOR HAVE SAME FRAME COUNT ### print('frames in behavior stim: {}'.format(self.behavior_frame_count)) print('frames in replay stim: {}'.format(self.replay_frame_count)) #assert(behavior_frame_count==replay_frame_count) # look for potential frame offsets from aborted stims (self.behavior_start_frame, self.mapping_start_frame, self.replay_start_frame) = probeSync.get_frame_offsets( self.syncDataset, [self.behavior_frame_count, self.mapping_frame_count, self.replay_frame_count]) self.behavior_end_frame = self.behavior_start_frame + self.behavior_frame_count - 1 self.mapping_end_frame = self.mapping_start_frame + self.mapping_frame_count - 1 self.replay_end_frame = self.replay_start_frame + self.replay_frame_count - 1 MONITOR_LAG = 0.036 #TO DO: don't hardcode this... self.FRAME_APPEAR_TIMES = self.vf + MONITOR_LAG self.behavior_start_time, self.mapping_start_time, self.replay_start_time = [self.FRAME_APPEAR_TIMES[f] for f in [self.behavior_start_frame, self.mapping_start_frame, self.replay_start_frame]] self.behavior_end_time, self.mapping_end_time, self.replay_end_time = [self.FRAME_APPEAR_TIMES[f] for f in [self.behavior_end_frame, self.mapping_end_frame, self.replay_end_frame]] self.probe_dirs = [self.paths['probe'+pid] for pid in self.paths['data_probes']] self.probe_dict = None self.lfp_dict = None self._run_modules()
def __init__(self, exp_id, save_root, modules_to_run='all', cortical_sort=False, probes_to_run='ABCDEF', ctx_units_percentile=50): self.modules_to_run = modules_to_run self.errors = [] self.cortical_sort = cortical_sort self.genotype = None self.ctx_units_percentile = ctx_units_percentile self.data_stream_status = { 'pkl': [False, self._load_pkl_data], 'opto': [False, self._load_opto_data], 'sync': [False, self._load_sync_data], 'unit': [False, self._build_unit_table], 'LFP': [False, self._build_lfp_dict] } identifier = exp_id if identifier.find('_') >= 0: d = data_getters.local_data_getter(base_dir=identifier, cortical_sort=cortical_sort) else: d = data_getters.lims_data_getter(exp_id=identifier) self.paths = d.data_dict self.FIG_SAVE_DIR = os.path.join( save_root, self.paths['es_id'] + '_' + self.paths['external_specimen_name'] + '_' + self.paths['datestring']) if not os.path.exists(self.FIG_SAVE_DIR): os.mkdir(self.FIG_SAVE_DIR) self.figure_prefix = self.paths[ 'external_specimen_name'] + '_' + self.paths['datestring'] + '_' ### GET FILE PATHS TO SYNC AND PKL FILES ### self.SYNC_FILE = self.paths.get('sync_file', 'none found') self.BEHAVIOR_PKL = self.paths.get('behavior_pkl', 'none found') self.REPLAY_PKL = self.paths.get('replay_pkl', 'none found') self.MAPPING_PKL = self.paths.get('mapping_pkl', 'none found') self.OPTO_PKL = self.paths.get('opto_pkl', 'none found') for f, s in zip([ self.SYNC_FILE, self.BEHAVIOR_PKL, self.REPLAY_PKL, self.MAPPING_PKL ], ['sync: ', 'behavior: ', 'replay: ', 'mapping: ']): print(s + f) self.probe_dirs = [ self.paths['probe' + pid] for pid in self.paths['data_probes'] ] self.lfp_dirs = [ self.paths['lfp' + pid] for pid in self.paths['data_probes'] ] self.probe_dict = None self.lfp_dict = None self.metrics_dict = None self.probeinfo_dict = None self.agar_channel_dict = None self._get_genotype() self._get_platform_info() # self._make_specimen_meta_json() # self._make_session_meta_json() self.probes_to_run = [ p for p in probes_to_run if p in self.paths['data_probes'] ] self._run_modules()