def assembleBatchResults(*, batch_name): batch = ca.loadObject(key=dict(batch_name=batch_name)) jobs = batch['jobs'] print('Assembling results for batch {} with {} jobs'.format( batch_name, len(jobs))) job_results = [] for job in jobs: print('ASSEMBLING: ' + job['label']) result = ca.loadObject(key=job) if not result: raise Exception('Unable to load object for job: ' + job['label']) job_results.append(dict(job=job, result=result)) print('Saving results...') ca.saveObject(key=dict(name='job_results', batch_name=batch_name), object=dict(job_results=job_results)) print('Done.')
def clearBatch(*, batch_name, test_one=False): batch = ca.loadObject(key=dict(batch_name=batch_name)) jobs = batch['jobs'] if test_one and (len(jobs) > 0): jobs = [jobs[0]] setBatchStatus(batch_name=batch_name, status='clearing_batch') _clear_job_results(jobs=jobs, incomplete_only=False) setBatchStatus(batch_name=batch_name, status='finished_clearing_batch')
def runBatch(*, batch_name, test_one=False): print('Loading batch object...') batch = ca.loadObject(key=dict(batch_name=batch_name)) jobs = batch['jobs'] if test_one and (len(jobs) > 0): jobs = [jobs[0]] print('Running batch with {} jobs...'.format(len(jobs))) for job in jobs: _run_job(job)
def _on_group_changed(self, value): group_name = self._SEL_group.value() if not group_name: return a = ca.loadObject(key=dict(name='spikeforest_recording_group', group_name=group_name)) # key=dict(name='spikeforest_results', output_id='spikeforest_test2')) SF = sf.SFData() SF.loadStudies(a['studies']) SF.loadRecordings2(a['recordings']) self._SF = SF self._SEL_study.setOptions(SF.studyNames()) self._on_study_changed(value=self._SEL_study.value())
def prepareBatch(*, batch_name, test_one=False): batch = ca.loadObject(key=dict(batch_name=batch_name)) jobs = batch['jobs'] if test_one and (len(jobs) > 0): jobs = [jobs[0]] setBatchStatus(batch_name=batch_name, status='preparing_batch') _clear_job_results(jobs=jobs, incomplete_only=True) setBatchStatus(batch_name=batch_name, status='downloading_recordings') _download_recordings(jobs=jobs) setBatchStatus(batch_name=batch_name, status='finished_preparing_batch')
def _retrieve_batch(batch_name): print('Retrieving batch {}'.format(batch_name)) key = dict(name='batcho_batch', batch_name=batch_name) a = ca.getValue(key=key) if not a: print('Unable to retrieve batch {}. Not found in pairio.'.format(batch_name)) return None obj = ca.loadObject(key=key) if not obj: print( 'Unable to retrieve batch {}. Object not found on kbucket.'.format(batch_name)) return None if 'jobs' not in obj: raise Exception( 'batch object does not contain jobs field for batch_name={}'.format(batch_name)) return obj
def __init__(self): vd.Component.__init__(self) self._recording_group_names = ca.loadObject(key=dict( name='spikeforest_recording_group_names')) self._SEL_group = vd.components.SelectBox(options=[''] + self._recording_group_names) self._SEL_group.onChange(self._on_group_changed) self._SEL_study = vd.components.SelectBox(options=[]) self._SEL_study.onChange(self._on_study_changed) self._SEL_recording = vd.components.SelectBox(options=[]) self._SEL_recording.onChange(self._on_recording_changed) self._recording_widget = SFRecordingWidget() self._on_group_changed(value=self._SEL_group.value()) vd.devel.loadBootstrap()
def _on_output_id_changed(self, value): output_id = self._SEL_output_id.value() if not output_id: return key = dict(name='spikeforest_results', output_id=output_id) a = ca.loadObject(key=key) if a is None: raise Exception( 'Unable to load spikeforest result: {}'.format(output_id)) SF = sf.SFData() SF.loadStudies(a['studies']) SF.loadRecordings2(a['recordings']) SF.loadSortingResults(a['sorting_results']) self._SF = SF self._SEL_study.setOptions(SF.studyNames()) self._on_study_changed(value=self._SEL_study.value())
def loadProcessingBatch(self, *, batch_name=None, key=None, verbose=False): if batch_name: key = dict(name='batcho_batch_results', batch_name=batch_name) if not pa.get(key=key): raise Exception('Batch result not found.') obj = ca.loadObject(key=key) job_results = obj.get('job_results', obj.get('results')) # transitioning to 'results' num_sorting_results = 0 num_recording_summary_results = 0 for X in job_results: if X['job']['command'] == 'sort_recording': study_name = X['job']['recording']['study'] recording_name = X['job']['recording']['name'] sorter_name = X['job']['sorter']['name'] result = X['result'] S = self.study(study_name) if S: D = S.recording(recording_name) if D: num_sorting_results = num_sorting_results + 1 D.addSortingResult(result) else: print('Warning: recording not found: ' + recording_name) else: print('Warning: study not found: ' + study_name) elif X['job']['command'] == 'summarize_recording': study_name = X['job']['recording']['study'] recording_name = X['job']['recording']['name'] result = X['result'] S = self.study(study_name) if S: D = S.recording(recording_name) if D: num_recording_summary_results = num_recording_summary_results + 1 D.setSummaryResult(result) else: print('Warning: recording not found: ' + recording_name) else: print('Warning: study not found: ' + study_name) else: pass if verbose: print('Loaded {} sorting results and {} recording summary results'. format(num_sorting_results, num_recording_summary_results))
def __init__(self, output_id): vd.Component.__init__(self) self._output_id = output_id a = ca.loadObject(key=dict(name='spikeforest_results'), subkey=output_id) if not a: print('ERROR: unable to open results: ' + output_id) return if ('recordings' not in a) or ('studies' not in a) or ('sorting_results' not in a): print('ERROR: problem with output: ' + output_id) return studies = a['studies'] recordings = a['recordings'] sorting_results = a['sorting_results'] SF = sf.SFData() SF.loadStudies(studies) SF.loadRecordings2(recordings) SF.loadSortingResults(sorting_results) # sorter_names=[] # for SR in sorting_results: # sorter_names.append(SR['sorter']['name']) # sorter_names=list(set(sorter_names)) # sorter_names.sort() self._SF_data = SF self._accuracy_threshold_input = vd.components.LineEdit( value=0.8, dtype=float, style=dict(width='70px')) self._update_button = vd.components.Button(onclick=self._on_update, class_='button', label='Update') self._study_sorter_fig = StudySorterFigure(SF) self._study_sorter_table = vd.div() # dummy vd.devel.loadBootstrap() self._update_accuracy_table()
def loadRecordings(self, *, key=None, verbose=False): # old if key is None: key = dict(name='spikeforest_studies_processed') obj = ca.loadObject(key=key) studies = obj['studies'] for study in studies: name = study['name'] if name in self._studies_by_name: print('Study already loaded: ' + name) else: self._study_names.append(study['name']) S = SFStudy(study) self._studies_by_name[name] = S recordings = obj['recordings'] print( 'recordings ===================================================================' ) print(recordings) for ds in recordings: study = ds.get('study_name', ds.get('study')) self._studies_by_name[study].addRecording(ds) if verbose: print('Loaded {} recordings'.format(len(recordings)))
def summarize_recordings(recordings, compute_resource=None): print('>>>>>> summarize recordings') jobs_info = [] jobs_timeseries_plot = [] jobs_units_info = [] for recording in recordings: print('Creating jobs for recording: {}/{}'.format( recording.get('study', ''), recording.get('name', ''))) raw_path = recording['directory'] + '/raw.mda' firings_true_path = recording['directory'] + '/firings_true.mda' channels = recording.get('channels', None) units = recording.get('units_true', None) if not ca.findFile(path=firings_true_path): raise Exception('firings_true file not found: ' + firings_true_path) job = ComputeRecordingInfo.createJob( recording_dir=recording['directory'], channels=recording.get('channels', []), json_out={ 'ext': '.json', 'upload': True }, _container='default') job['files_to_realize'] = [raw_path, firings_true_path] jobs_info.append(job) # job=CreateTimeseriesPlot.createJob( # recording_dir=recording['directory'], # channels=recording.get('channels',[]), # jpg_out={'ext':'.jpg','upload':True}, # _container='default' # ) # jobs_timeseries_plot.append(job) job = ComputeUnitsInfo.createJob(recording_dir=recording['directory'], firings=recording['directory'] + '/firings_true.mda', unit_ids=units, channel_ids=channels, json_out={ 'ext': '.json', 'upload': True }, _container='default') jobs_units_info.append(job) # all_jobs=jobs_info+jobs_timeseries_plot+jobs_units_info all_jobs = jobs_info + jobs_units_info label = 'Summarize recordings' mlpr.executeBatch(jobs=all_jobs, label=label, num_workers=None, compute_resource=compute_resource) summarized_recordings = [] for i, recording in enumerate(recordings): firings_true_path = recording['directory'] + '/firings_true.mda' summary = dict() result0 = jobs_info[i]['result'] summary['computed_info'] = ca.loadObject( path=result0['outputs']['json_out']) # result0=jobs_timeseries_plot[i]['result'] # summary['plots']=dict( # timeseries=ca.saveFile(path=result0['outputs']['jpg_out'],basename='timeseries.jpg') # ) summary['plots'] = dict() result0 = jobs_units_info[i]['result'] summary['true_units_info'] = ca.saveFile( path=result0['outputs']['json_out'], basename='true_units_info.json') rec2 = deepcopy(recording) rec2['summary'] = summary summarized_recordings.append(rec2) return summarized_recordings
def _get_job_result(*, batch_name, job_index): key = dict(name='batcho_job_result', batch_name=batch_name, job_index=job_index) return ca.loadObject(key=key)
def kb_read_json_file(fname): return ca.loadObject(path=fname)
def _get_job_status(*, batch_name, job_index): key = dict(name='batcho_job_statuses', batch_name=batch_name) subkey = str(job_index) return ca.loadObject(key=key, subkey=subkey)
def get_batch_status(*, batch_name): key = dict(name='batcho_batch_status', batch_name=batch_name) return ca.loadObject(key=key)
# Output: /tmp/sha1-cache/4/82/482cb0cfcbed6740a2bcb659c9ccc22a4d27b369 # Or we can store some large text by key and retrieve it later ca.saveText(key=dict(name='key-for-repeating-text'), text='some large repeating text'*100) txt = ca.loadText(key=dict(name='key-for-repeating-text')) print(len(txt)) # Output: 2500 print('------------------------------------------------') # Similarly we can store python dicts via json content path = ca.saveObject(dict(some='object'), basename='object.json') print(path) # Output: sha1://b77fdda467b03d7a0c3e06f6f441f689ac46e817/object.json retrieved_object = ca.loadObject(path=path) print(retrieved_object) # Or store objects by key ca.saveObject(object=dict(some_other='object'), key=dict(some='key')) obj = ca.loadObject(key=dict(some='key')) print(obj) print('------------------------------------------------') # You can do the same with files with open('test___.txt', 'w') as f: f.write('some file content') path = ca.saveFile('test___.txt') print(path) # Output: sha1://ee025361a15e3e8074e9c0b44b4f98aabc829b3d/test___.txt
def get_batch_results(*, batch_name): key = dict(name='batcho_batch_results', batch_name=batch_name) return ca.loadObject(key=key)
parser = argparse.ArgumentParser( description='Run SpikeForest batch processing') parser.add_argument('command', help='clear, prepare, run, assemble') parser.add_argument('batch_name', help='Name of the batch') args = parser.parse_args() batch_name = args.batch_name spikeforest_password = os.environ.get('SPIKEFOREST_PASSWORD', '') if not spikeforest_password: raise Exception('Environment variable not set: SPIKEFOREST_PASSWORD') print('Loading batch: ' + batch_name) sf.kbucketConfigRemote(name='spikeforest1-readwrite', password=spikeforest_password) obj = ca.loadObject(key=dict(batch_name=batch_name)) if not obj: raise Exception('Unable to find batches object.') command = args.command if command == 'clear': sf.sf_batch.clear_job_results(batch_name=batch_name, incomplete_only=False) elif command == 'prepare': sf.sf_batch.download_recordings(batch_name=batch_name) sf.sf_batch.clear_job_results(batch_name=batch_name, incomplete_only=True) elif command == 'run': sf.sf_batch.run_jobs(batch_name=batch_name) elif command == 'assemble': sf.sf_batch.assemble_job_results(batch_name=batch_name)
def main(): # Use this to optionally connect to a kbucket share: ca.autoConfig(collection='spikeforest', key='spikeforest2-readwrite', ask_password=True, password=os.environ.get('SPIKEFOREST_PASSWORD', None)) # Specify the compute resource (see the note above) #compute_resource = 'ccmlin008-80' #compute_resource_ks = 'ccmlin008-kilosort' compute_resource = None compute_resource_ks = None #compute_resource = 'ccmlin000-80' #compute_resource_ks = 'ccmlin000-kilosort' # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'spikeforest_test3' #group_name = 'magland_synth_test' group_name = 'mearec_sqmea_test' a = ca.loadObject( key=dict(name='spikeforest_recording_group', group_name=group_name)) recordings = a['recordings'] studies = a['studies'] recordings = [recordings[0]] # Summarize the recordings recordings_B = sa.summarize_recordings(recordings=recordings, compute_resource=compute_resource) # Sorters (algs and params) are defined below sorters = define_sorters() # We will be assembling the sorting results here sorting_results_A = [] for sorter in sorters: # Sort the recordings compute_resource0 = compute_resource if sorter['name'] == 'KiloSort': compute_resource0 = compute_resource_ks sortings = sa.sort_recordings(sorter=sorter, recordings=recordings_B, compute_resource=compute_resource0) # Append to results sorting_results_A = sorting_results_A + sortings # Summarize the sortings sorting_results_B = sa.summarize_sortings( sortings=sorting_results_A, compute_resource=compute_resource) # Compare with ground truth sorting_results_C = sa.compare_sortings_with_truth( sortings=sorting_results_B, compute_resource=compute_resource) # TODO: collect all the units for aggregated analysis # Save the output print('Saving the output') ca.saveObject(key=dict(name='spikeforest_results', output_id=output_id), object=dict(studies=studies, recordings=recordings_B, sorting_results=sorting_results_C))
compute_resource = None sorting_ms4 = sa.sort_recordings(recordings=[recording], sorter=sorter_ms4_thr3, compute_resource=compute_resource)[0] sorting_sc = sa.sort_recordings(recordings=[recording], sorter=sorter_sc, compute_resource=compute_resource)[0] # sorting_yass=sa.sort_recordings(recordings=[recording],sorter=sorter_yass,compute_resource=compute_resource)[0] # %% display(sorting_ms4) print(ca.loadText(path=sorting_ms4['console_out'])[0:1000]) display(sorting_sc) print(ca.loadText(path=sorting_sc['console_out'])[0:1000]) # %% sorting_ms4['summary'] = sa.summarize_sortings( sortings=[sorting_ms4], compute_resource=compute_resource) sorting_ms4['comparison_with_truth'] = sa.compare_sortings_with_truth( sortings=[sorting_ms4], compute_resource=compute_resource) # %% sorting_ms4 # %% ca.loadObject( path='sha1://cbc3f0d7beb8f94d3bf4287b38ca4b05782f94ec/table.json') # %%
print(os.getcwd()) except: pass # %% get_ipython().run_line_magic('load_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') password = os.environ.get('SPIKEFOREST_PASSWORD') ca.autoConfig(collection='spikeforest', key='spikeforest2-readwrite', ask_password=True, password=password) # %% ca.loadObject(key=dict(name='spikeforest_recording_group_names')) # %% # The base directory used below basedir = 'kbucket://15734439d8cf/groundtruth' # %% ca.saveObject( key=dict(name='spikeforest_recording_group_names'), object=[ 'magland_synth', 'magland_synth_test', 'mearec_sqmea_test', ]
def main(): ca.autoConfig(collection='spikeforest', key='spikeforest2-readwrite', ask_password=True, password=os.environ.get('SPIKEFOREST_PASSWORD', None)) # Use this to optionally connect to a kbucket share: # for downloading containers if needed ca.setRemoteConfig(alternate_share_ids=['69432e9201d0']) # Specify the compute resource (see the note above) compute_resource = 'default' #compute_resource = 'local-computer' #compute_resource = 'ccmlin008-default' #compute_resource_ks = 'ccmlin008-kilosort' # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'visapy_mea' # Grab the recordings for testing group_name = 'visapy_mea' a = ca.loadObject( key=dict(name='spikeforest_recording_group', group_name=group_name)) recordings = a['recordings'] studies = a['studies'] # recordings = [recordings[0]] # recordings = recordings[0:3] # Summarize the recordings recordings = sa.summarize_recordings(recordings=recordings, compute_resource=compute_resource) # Sorters (algs and params) are defined below sorters = _define_sorters() # We will be assembling the sorting results here sorting_results = [] for sorter in sorters: # Sort the recordings compute_resource0 = compute_resource if sorter['name'] == 'KiloSort': compute_resource0 = compute_resource_ks sortings = sa.sort_recordings(sorter=sorter, recordings=recordings, compute_resource=compute_resource0) # Append to results sorting_results = sorting_results + sortings # Summarize the sortings sorting_results = sa.summarize_sortings(sortings=sorting_results, compute_resource=compute_resource) # Compare with ground truth sorting_results = sa.compare_sortings_with_truth( sortings=sorting_results, compute_resource=compute_resource) # Aggregate the results aggregated_sorting_results = sa.aggregate_sorting_results( studies, recordings, sorting_results) # Save the output print('Saving the output') ca.saveObject(key=dict(name='spikeforest_results'), subkey=output_id, object=dict(studies=studies, recordings=recordings, sorting_results=sorting_results, aggregated_sorting_results=ca.saveObject( object=aggregated_sorting_results))) for sr in aggregated_sorting_results['study_sorting_results']: study_name = sr['study'] sorter_name = sr['sorter'] n1 = np.array(sr['num_matches']) n2 = np.array(sr['num_false_positives']) n3 = np.array(sr['num_false_negatives']) accuracies = n1 / (n1 + n2 + n3) avg_accuracy = np.mean(accuracies) txt = 'STUDY: {}, SORTER: {}, AVG ACCURACY: {}'.format( study_name, sorter_name, avg_accuracy) print(txt)