def set_batch(*, batch_name, jobs, label=None, compute_resource=None): if label is None: label = batch_name status0 = get_batch_status(batch_name=batch_name) if status0 is not None: if status0['status'] not in ['finished', 'error']: raise Exception('Unable to set batch. Batch status already exists for {}: {}'.format( batch_name, status0)) _set_batch_status(batch_name=batch_name, status=dict(status='initializing')) # set a new batch code batch_code = 'batch_code_'+_random_string(10) _set_batch_code(batch_name, batch_code) # set the batch key = dict(name='batcho_batch', batch_name=batch_name) ca.saveObject(key=key, object=dict(label=label, jobs=jobs)) _set_batch_status(batch_name=batch_name, status=dict(status='initialized')) if compute_resource is not None: clear_batch_jobs(batch_name=batch_name) add_batch_name_for_compute_resource(compute_resource, batch_name)
def assemble_batch(*, batch_name): batch_code = _get_batch_code(batch_name) if not batch_code: raise Exception( 'Unable to get batch code for batch {}'.format(batch_name)) _set_batch_status(batch_name=batch_name, status=dict(status='assembling')) batch = _retrieve_batch(batch_name) if not batch: _set_batch_status(batch_name=batch_name, status=dict( status='error', error='Unable to retrieve batch in assemble_batch.')) return False batch_label = batch.get('label', 'unknown') jobs = batch['jobs'] print('Batch ({}) has {} jobs.'.format(batch_label, len(jobs))) status_strings = get_batch_job_statuses(batch_name=batch_name) assembled_results = [] for ii, job in enumerate(jobs): _check_batch_code(batch_name, batch_code) _set_batch_status(batch_name=batch_name, status=dict( status='assembling', job_index=ii)) command = job['command'] job_label = job['label'] # there is sometimes a mysterious error here.... status_string = status_strings.get(str(ii), None) if status_string == 'finished': print('ASSEMBLING job result for {}'.format(job_label)) result = _get_job_result(batch_name=batch_name, job_index=ii) assembled_results.append(dict( job=job, result=result )) else: errstr = 'Problem assembling job {}. Status is {}.'.format( ii, status_string) _set_batch_status(batch_name=batch_name, status=dict( status='error', error=errstr)) raise Exception(errstr) _check_batch_code(batch_name, batch_code) print('Assembling {} results'.format(len(assembled_results))) ca.saveObject(key=dict(name='batcho_batch_results', batch_name=batch_name), object=dict(results=assembled_results), confirm=True) _set_batch_status(batch_name=batch_name, status=dict(status='done_assembling')) return True
def assembleBatchResults(*, batch_name): batch = ca.loadObject(key=dict(batch_name=batch_name)) jobs = batch['jobs'] print('Assembling results for batch {} with {} jobs'.format( batch_name, len(jobs))) job_results = [] for job in jobs: print('ASSEMBLING: ' + job['label']) result = ca.loadObject(key=job) if not result: raise Exception('Unable to load object for job: ' + job['label']) job_results.append(dict(job=job, result=result)) print('Saving results...') ca.saveObject(key=dict(name='job_results', batch_name=batch_name), object=dict(job_results=job_results)) print('Done.')
def _run_job(job): val = ca.getValue(key=job) if val: return code = ''.join(random.choice(string.ascii_uppercase) for x in range(10)) if not ca.setValue(key=job, value='in-process-' + code, overwrite=False): return status = dict(time_started=_make_timestamp(), status='running') _set_job_status(job, status) print('Running job: ' + job['label']) try: result = _do_run_job(job) except: status['time_finished'] = _make_timestamp() status['status'] = 'error' status['error'] = 'Exception in _do_run_job' val = ca.getValue(key=job) if val == 'in-process-' + code: _set_job_status(job, status) raise val = ca.getValue(key=job) if val != 'in-process-' + code: print( 'Not saving result because in-process code does not match {} <> {}.' .format(val, 'in-process-' + code)) return status['time_finished'] = _make_timestamp() status['result'] = result if 'error' in result: print('Error running job: ' + result['error']) status['status'] = 'error' status['error'] = result['error'] _set_job_status(job, status) ca.setValue(key=job, value='error-' + code) return status['status'] = 'finished' ca.saveObject( key=job, object=result ) # Not needed in future, because we should instead use the status object
def _set_job_status(*, batch_name, job_index, status): # if job_code: # code = _get_job_lock_code(batch_name=batch_name, job_index=job_index) # if code != job_code: # print('Not setting job status because lock code does not match batch code') # return status_string = None if status: status_string = status.get('status', None) key = dict(name='batcho_job_statuses', batch_name=batch_name) subkey = str(job_index) if not ca.saveObject(key=key, subkey=subkey, object=status): return False key = dict(name='batcho_job_status_strings', batch_name=batch_name) subkey = str(job_index) if not ca.setValue(key=key, subkey=subkey, value=status_string): print('WARNING: problem setting batch job status for subkey {}: {}'.format( subkey, status_string)) return False return True
def _set_batch_status(*, batch_name, status): key = dict(name='batcho_batch_status', batch_name=batch_name) return ca.saveObject(key=key, object=status)
def test_spikeforest_analysis(tmpdir): tmpdir = str(tmpdir) # generate toy recordings delete_recordings = True num_recordings = 2 duration = 15 for num in range(1, num_recordings+1): dirname = tmpdir+'/toy_example{}'.format(num) if delete_recordings: if os.path.exists(dirname): shutil.rmtree(dirname) if not os.path.exists(dirname): rx, sx_true = se.example_datasets.toy_example1( duration=duration, num_channels=4, samplerate=30000, K=10) se.MdaRecordingExtractor.writeRecording( recording=rx, save_path=dirname) se.MdaSortingExtractor.writeSorting( sorting=sx_true, save_path=dirname+'/firings_true.mda') # Use this to optionally connect to a kbucket share: # ca.autoConfig(collection='spikeforest',key='spikeforest2-readwrite',ask_password=True) # for downloading containers if needed ca.setRemoteConfig(alternate_share_ids=['69432e9201d0']) # Specify the compute resource (see the note above) # compute_resource = 'local-computer' compute_resource = None # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'spikeforest_test0' # Grab the recordings for testing recordings = [ dict( recording_name='toy_example{}'.format(num), study_name='toy_examples', directory=tmpdir+'/toy_example{}'.format(num) ) for num in range(1, num_recordings+1) ] studies = [ dict( name='toy_examples', study_set='toy_examples', directory=os.path.abspath('.'), description='Toy examples.' ) ] # Summarize the recordings recordings = sa.summarize_recordings( recordings=recordings, compute_resource=compute_resource) # Sorters (algs and params) are defined below sorters = _define_sorters() # We will be assembling the sorting results here sorting_results = [] for sorter in sorters: # Sort the recordings sortings = sa.sort_recordings( sorter=sorter, recordings=recordings, compute_resource=compute_resource ) # Summarize the sortings sortings = sa.summarize_sortings( sortings=sortings, compute_resource=compute_resource ) # Compare with ground truth sortings = sa.compare_sortings_with_truth( sortings=sortings, compute_resource=compute_resource ) # Append to results sorting_results = sorting_results+sortings # TODO: collect all the units for aggregated analysis aggregated_sorting_results = sa.aggregate_sorting_results(studies, recordings, sorting_results) # Save the output print('Saving the output') ca.saveObject( key=dict( name='spikeforest_results', output_id=output_id ), object=dict( studies=studies, recordings=recordings, sorting_results=sorting_results, aggregated_sorting_results=ca.saveObject(object=aggregated_sorting_results) ) ) for sr in aggregated_sorting_results['study_sorting_results']: study_name=sr['study'] sorter_name=sr['sorter'] n1=np.array(sr['num_matches']) n2=np.array(sr['num_false_positives']) n3=np.array(sr['num_false_negatives']) accuracies=n1/(n1+n2+n3) avg_accuracy=np.mean(accuracies) txt='STUDY: {}, SORTER: {}, AVG ACCURACY: {}'.format(study_name,sorter_name,avg_accuracy) print(txt) if avg_accuracy<0.3: if sorter_name == 'Yass': print('Average accuracy is too low, but we are excusing Yass for now.') else: raise Exception('Average accuracy is too low for test----- '+txt)
def main(): # generate toy recordings if not os.path.exists('recordings'): os.mkdir('recordings') delete_recordings = False recpath = 'recordings/example1' if os.path.exists(recpath) and (delete_recordings): shutil.rmtree(recpath) if not os.path.exists(recpath): rx, sx_true = se.example_datasets.toy_example1(duration=60, num_channels=4, samplerate=30000, K=10) se.MdaRecordingExtractor.writeRecording(recording=rx, save_path=recpath) se.MdaSortingExtractor.writeSorting(sorting=sx_true, save_path=recpath + '/firings_true.mda') # for downloading containers if needed ca.setRemoteConfig(alternate_share_ids=['69432e9201d0']) # Specify the compute resource compute_resource = None num_workers = 10 # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'toy_example_local' # Grab the recordings for testing recordings = [ dict(recording_name='example1', study_name='toy_examples', directory=os.path.abspath('recordings/example1')) ] recordings = recordings * 10 studies = [ dict(name='toy_examples', study_set='toy_examples', directory=os.path.abspath('recordings'), description='Toy examples.') ] # Sorters (algs and params) are defined below sorters = _define_sorters() # We will be assembling the sorting results here sorting_results = [] for sorter in sorters: # Sort the recordings compute_resource0 = compute_resource if sorter['name'] == 'KiloSort': compute_resource0 = compute_resource_ks sortings = sa.sort_recordings(sorter=sorter, recordings=recordings, compute_resource=compute_resource0, num_workers=num_workers) # Append to results sorting_results = sorting_results + sortings # Summarize the sortings sorting_results = sa.summarize_sortings(sortings=sorting_results, compute_resource=compute_resource) # Compare with ground truth sorting_results = sa.compare_sortings_with_truth( sortings=sorting_results, compute_resource=compute_resource, num_workers=num_workers) # Save the output print('Saving the output') ca.saveObject(key=dict(name='spikeforest_results'), subkey=output_id, object=dict(studies=studies, recordings=recordings, sorting_results=sorting_results))
def main(): ca.autoConfig(collection='spikeforest', key='spikeforest2-readwrite', ask_password=True, password=os.environ.get('SPIKEFOREST_PASSWORD', None)) # Use this to optionally connect to a kbucket share: # for downloading containers if needed ca.setRemoteConfig(alternate_share_ids=['69432e9201d0']) # Specify the compute resource (see the note above) compute_resource = 'default' #compute_resource = 'local-computer' #compute_resource = 'ccmlin008-default' #compute_resource_ks = 'ccmlin008-kilosort' # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'visapy_mea' # Grab the recordings for testing group_name = 'visapy_mea' a = ca.loadObject( key=dict(name='spikeforest_recording_group', group_name=group_name)) recordings = a['recordings'] studies = a['studies'] # recordings = [recordings[0]] # recordings = recordings[0:3] # Summarize the recordings recordings = sa.summarize_recordings(recordings=recordings, compute_resource=compute_resource) # Sorters (algs and params) are defined below sorters = _define_sorters() # We will be assembling the sorting results here sorting_results = [] for sorter in sorters: # Sort the recordings compute_resource0 = compute_resource if sorter['name'] == 'KiloSort': compute_resource0 = compute_resource_ks sortings = sa.sort_recordings(sorter=sorter, recordings=recordings, compute_resource=compute_resource0) # Append to results sorting_results = sorting_results + sortings # Summarize the sortings sorting_results = sa.summarize_sortings(sortings=sorting_results, compute_resource=compute_resource) # Compare with ground truth sorting_results = sa.compare_sortings_with_truth( sortings=sorting_results, compute_resource=compute_resource) # Aggregate the results aggregated_sorting_results = sa.aggregate_sorting_results( studies, recordings, sorting_results) # Save the output print('Saving the output') ca.saveObject(key=dict(name='spikeforest_results'), subkey=output_id, object=dict(studies=studies, recordings=recordings, sorting_results=sorting_results, aggregated_sorting_results=ca.saveObject( object=aggregated_sorting_results))) for sr in aggregated_sorting_results['study_sorting_results']: study_name = sr['study'] sorter_name = sr['sorter'] n1 = np.array(sr['num_matches']) n2 = np.array(sr['num_false_positives']) n3 = np.array(sr['num_false_negatives']) accuracies = n1 / (n1 + n2 + n3) avg_accuracy = np.mean(accuracies) txt = 'STUDY: {}, SORTER: {}, AVG ACCURACY: {}'.format( study_name, sorter_name, avg_accuracy) print(txt)
# ... or retrieve the path to a local file containing the text fname = ca.realizeFile(path=path) print(fname) # Output: /tmp/sha1-cache/4/82/482cb0cfcbed6740a2bcb659c9ccc22a4d27b369 # Or we can store some large text by key and retrieve it later ca.saveText(key=dict(name='key-for-repeating-text'), text='some large repeating text'*100) txt = ca.loadText(key=dict(name='key-for-repeating-text')) print(len(txt)) # Output: 2500 print('------------------------------------------------') # Similarly we can store python dicts via json content path = ca.saveObject(dict(some='object'), basename='object.json') print(path) # Output: sha1://b77fdda467b03d7a0c3e06f6f441f689ac46e817/object.json retrieved_object = ca.loadObject(path=path) print(retrieved_object) # Or store objects by key ca.saveObject(object=dict(some_other='object'), key=dict(some='key')) obj = ca.loadObject(key=dict(some='key')) print(obj) print('------------------------------------------------') # You can do the same with files with open('test___.txt', 'w') as f:
recordings = [] names = [] names = names + ['visapy_mea'] for name in names: print('PREPARING: ' + name) study_name = 'visapy_mea' study_dir = basedir + '/visapy_mea' study0 = dict(name=study_name, study_set=study_set_name, directory=study_dir, description='') studies.append(study0) dd = ca.readDir(study_dir) for dsname in dd['dirs']: dsdir = '{}/{}'.format(study_dir, dsname) recordings.append( dict( name=dsname, study=study_name, directory=dsdir, description='One of the recordings in the {} study'.format( study_name))) return studies, recordings # Prepare the studies studies, recordings = prepare_visapy_mea_studies(basedir=basedir) ca.saveObject(object=dict(studies=studies, recordings=recordings), key=dict(name='spikeforest_recording_group', group_name=group_name))
def main(): # generate toy recordings delete_recordings = False num_recordings = 1 for num in range(1, num_recordings + 1): name = 'toy_example{}'.format(num) if delete_recordings: if os.path.exists(name): shutil.rmtree(name) if not os.path.exists(name): rx, sx_true = se.example_datasets.toy_example1(duration=60, num_channels=4, samplerate=30000, K=10) se.MdaRecordingExtractor.writeRecording(recording=rx, save_path=name) se.MdaSortingExtractor.writeSorting(sorting=sx_true, save_path=name + '/firings_true.mda') # Use this to optionally connect to a kbucket share: # ca.autoConfig(collection='spikeforest',key='spikeforest2-readwrite',ask_password=True) # for downloading containers if needed ca.setRemoteConfig(alternate_share_ids=['69432e9201d0']) # Specify the compute resource (see the note above) compute_resource = None # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'spikeforest_test0' # Grab the recordings for testing recordings = [ dict(recording_name='toy_example{}'.format(num), study_name='toy_examples', directory=os.path.abspath('toy_example{}'.format(num))) for num in range(1, num_recordings + 1) ] studies = [ dict(name='toy_examples', study_set='toy_examples', directory=os.path.abspath('.'), description='Toy examples.') ] # Summarize the recordings recordings_B = sa.summarize_recordings(recordings=recordings, compute_resource=compute_resource) # Sorters (algs and params) are defined below sorters = define_sorters() # We will be assembling the sorting results here sorting_results = [] for sorter in sorters: # Sort the recordings sortings_A = sa.sort_recordings(sorter=sorter, recordings=recordings_B, compute_resource=compute_resource) # Summarize the sortings sortings_B = sa.summarize_sortings(sortings=sortings_A, compute_resource=compute_resource) # Compare with ground truth sortings_C = sa.compare_sortings_with_truth( sortings=sortings_B, compute_resource=compute_resource) # Append to results sorting_results = sorting_results + sortings_C # TODO: collect all the units for aggregated analysis # Save the output print('Saving the output') ca.saveObject(key=dict(name='spikeforest_results', output_id=output_id), object=dict(studies=studies, recordings=recordings_B, sorting_results=sorting_results))
def _set_job_status(job, status): ca.saveObject(key=dict(name='job_status', job=job), object=status)
def _set_job_result(*, batch_name, job_index, result): key = dict(name='batcho_job_result', batch_name=batch_name, job_index=job_index) return ca.saveObject(key=key, object=result, confirm=True)
def main(): # Use this to optionally connect to a kbucket share: ca.autoConfig(collection='spikeforest', key='spikeforest2-readwrite', ask_password=True, password=os.environ.get('SPIKEFOREST_PASSWORD', None)) # Specify the compute resource (see the note above) #compute_resource = 'ccmlin008-80' #compute_resource_ks = 'ccmlin008-kilosort' compute_resource = None compute_resource_ks = None #compute_resource = 'ccmlin000-80' #compute_resource_ks = 'ccmlin000-kilosort' # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'spikeforest_test3' #group_name = 'magland_synth_test' group_name = 'mearec_sqmea_test' a = ca.loadObject( key=dict(name='spikeforest_recording_group', group_name=group_name)) recordings = a['recordings'] studies = a['studies'] recordings = [recordings[0]] # Summarize the recordings recordings_B = sa.summarize_recordings(recordings=recordings, compute_resource=compute_resource) # Sorters (algs and params) are defined below sorters = define_sorters() # We will be assembling the sorting results here sorting_results_A = [] for sorter in sorters: # Sort the recordings compute_resource0 = compute_resource if sorter['name'] == 'KiloSort': compute_resource0 = compute_resource_ks sortings = sa.sort_recordings(sorter=sorter, recordings=recordings_B, compute_resource=compute_resource0) # Append to results sorting_results_A = sorting_results_A + sortings # Summarize the sortings sorting_results_B = sa.summarize_sortings( sortings=sorting_results_A, compute_resource=compute_resource) # Compare with ground truth sorting_results_C = sa.compare_sortings_with_truth( sortings=sorting_results_B, compute_resource=compute_resource) # TODO: collect all the units for aggregated analysis # Save the output print('Saving the output') ca.saveObject(key=dict(name='spikeforest_results', output_id=output_id), object=dict(studies=studies, recordings=recordings_B, sorting_results=sorting_results_C))
# %% ca.loadObject(key=dict(name='spikeforest_recording_group_names')) # %% # The base directory used below basedir = 'kbucket://15734439d8cf/groundtruth' # %% ca.saveObject( key=dict(name='spikeforest_recording_group_names'), object=[ 'magland_synth', 'magland_synth_test', 'mearec_sqmea_test', ] ) # %% [markdown] # # MAGLAND SYNTH # %% def prepare_magland_synth_studies(*, basedir): study_set_name = 'magland_synth' studies = [] recordings = [] names = []
def main(): # Use this to optionally connect to a kbucket share: ca.autoConfig(collection='spikeforest', key='spikeforest2-readwrite', ask_password=True) # Specify the compute resource (see the note above) compute_resource = 'jfm-laptop' # Use this to control whether we force the processing to re-run (by default it uses cached results) os.environ['MLPROCESSORS_FORCE_RUN'] = 'FALSE' # FALSE or TRUE # This is the id of the output -- for later retrieval by GUI's, etc output_id = 'spikeforest_test1' # Grab a couple recordings for testing recording1 = dict( recording_name='001_synth', study_name='datasets_noise10_K10_C4-test', study_set='magland_synth-test', directory= 'kbucket://15734439d8cf/groundtruth/magland_synth/datasets_noise10_K10_C4/001_synth' ) recording2 = dict( recording_name='002_synth', study_name='datasets_noise10_K10_C4-test', study_set='magland_synth-test', directory= 'kbucket://15734439d8cf/groundtruth/magland_synth/datasets_noise10_K10_C4/002_synth' ) recordings = [recording1, recording2] # Summarize the recordings recordings_B = sa.summarize_recordings(recordings=recordings, compute_resource=compute_resource) # Sorters (algs and params) are defined below sorters = define_sorters() # We will be assembling the sorting results here sorting_results = [] for sorter in sorters: # Sort the recordings sortings_A = sa.sort_recordings(sorter=sorter, recordings=recordings_B, compute_resource=compute_resource) # Summarize the sortings sortings_B = sa.summarize_sortings(sortings=sortings_A, compute_resource=compute_resource) # Compare with ground truth sortings_C = sa.compare_sortings_with_truth( sortings=sortings_B, compute_resource=compute_resource) # Append to results sorting_results = sorting_results + sortings_C # TODO: collect all the units for aggregated analysis # Save the output print('Saving the output') ca.saveObject(key=dict(name='spikeforest_results', output_id=output_id), object=dict(recordings=recordings_B, sorting_results=sorting_results))