def main(): # Load the index of SpikeForest sorting outputs into a pandas dataframe x = kp.load_object( 'sha1://21c4ad407244f18318bdbdeef2c953ad1eb61aef/sortingresults.json') df = pd.DataFrame(x) print(x[0].keys()) # Print the dataframe print('***************************************************************') print(df) print('***************************************************************') # Inspect the first 10 results for index in range(10): study_name = df['studyName'][index] recording_name = df['recordingName'][index] sorter_name = df['sorterName'][index] firings_uri = df['firings'][index] sorting_object = { 'sorting_format': 'mda', 'data': { 'firings': firings_uri, 'samplerate': 30000 } } sorting: se.SortingExtractor = le.LabboxEphysSortingExtractor( sorting_object) print(f'=========================================================') print(f'{study_name}/{recording_name} {sorter_name}') print(f'Num. units: {len(sorting.get_unit_ids())}')
def _create_object_for_arg(arg: Union[str, dict], samplerate=None) -> Union[dict, None]: # check to see if it already has the sorting_format field. If so, just return arg if (isinstance(arg, dict)) and ('sorting_format' in arg): return arg # if has form dict(path='...') then replace by the string if (isinstance(arg, dict)) and ('path' in arg) and (type(arg['path']) == str): arg = arg['path'] # if has type LabboxEphysSortingExtractor, then just get the object from arg.object() if isinstance(arg, LabboxEphysSortingExtractor): return arg.object() # if arg is a string ending with .json then replace arg by the object if (isinstance(arg, str)) and (arg.endswith('.json')): path = arg obj = kp.load_object(path) if obj is None: raise Exception(f'Unable to load object: {path}') return obj # See if it has format 'mda' obj = _try_mda_create_object(arg, samplerate=samplerate) if obj is not None: return obj return None
def _try_mda_create_object(arg: Union[str, dict]) -> Union[None, dict]: if isinstance(arg, str): path = arg if path.startswith('sha1dir') or path.startswith('/'): dd = kp.read_dir(path) if dd is not None: if 'raw.mda' in dd['files'] and 'params.json' in dd[ 'files'] and 'geom.csv' in dd['files']: raw_path = path + '/raw.mda' params_path = path + '/params.json' geom_path = path + '/geom.csv' geom_path_resolved = kp.load_file(geom_path) assert geom_path_resolved is not None, f'Unable to load geom.csv from: {geom_path}' params = kp.load_object(params_path) assert params is not None, f'Unable to load params.json from: {params_path}' geom = _load_geom_from_csv(geom_path_resolved) return dict(recording_format='mda', data=dict(raw=raw_path, geom=geom, params=params)) if isinstance(arg, dict): if ('raw' in arg) and ('geom' in arg) and ('params' in arg) and (type( arg['geom']) == list) and (type(arg['params']) == dict): return dict(recording_format='mda', data=dict(raw=arg['raw'], geom=arg['geom'], params=arg['params'])) return None
def run_test(test_nodes, tmpdir): api_port = 30001 try: # Start the daemons for tn in test_nodes: d = TestDaemon(label='d', channels=tn['channels'], api_port=api_port, storage_dir=tmpdir + f'/test_storage_{api_port}_{_randstr(5)}', port=tn['port'], websocket_port=tn['websocket_port'], bootstraps=tn['bootstraps'], isbootstrap=tn['isbootstrap'], nomulticast=True) tn['daemon'] = d tn['api_port'] = api_port print(f'starting daemon: {tn["name"]}') d.start() api_port = api_port + 1 # pause time.sleep(0.5) # Store some objects for tn in test_nodes: d = tn['daemon'] tn['uris'] = [] with d.testEnv(): import kachery as ka for obj in tn['objects_to_store']: uri = ka.store_object(obj) tn['uris'].append(uri) # Pause time.sleep(10) # Load the objects for tn in test_nodes: d = tn['daemon'] with d.testEnv(): import kachery as ka import kachery_p2p as kp for tn2 in test_nodes: if tn['name'] != tn2['name']: for uri in tn2['uris']: print( f'Node {tn["name"]} is loading {uri} from node {tn2["name"]}' ) obj = kp.load_object(uri) assert (obj is not None) finally: with PreventKeyboardInterrupt(): for tn in test_nodes: d = tn['daemon'] print(f'stopping daemon: {tn["name"]}') try: d.stop() except: print('WARNING: Failed to stop daemon.')
def prepare_sc_sortings(le_recordings_by_id): spyking_circus_sortings = { 'cortexlab-single-phase-3': 'sha1://2e748216d16d97a8a7ad54ba05f34df1d5fc724c/file.json', 'allen_mouse419112_probeE': 'sha1://b8ddf7527849d1734e533bb0b11787e6a1ac6eae/file.json', 'allen_mouse415148_probeE': 'sha1://be9d013191dc7a5c539c61b87da6672281d00da8/file.json', 'cortexlab-drift-dataset1': 'sha1://d618a4dfed3e8e74d980af60289a31186d962cb6/file.json', 'cortexlab-drift-dataset2': 'sha1://9c8e813fbab74c3c463295c13aa1c6e28a74a157/file.json', # 'svoboda-SC026_080619_g0_tcat': 'sha1://1e435f56012eb3f1b2996d04a4d40edd4d0d2968/file.json', 'svoboda-SC022_030319_g0_tcat_imec2': 'sha1://d5eb47fad3e6d4fd73c9dbea1f3e5a431ab73d2d/file.json' } le_sortings = [] for recording_id, sorting_path in spyking_circus_sortings.items(): le_recording = le_recordings_by_id[recording_id] print(sorting_path) sorting_object = kp.load_object(sorting_path) le_sortings.append(dict( sortingId=recording_id + ':spyking_circus', sortingLabel=recording_id + ':spyking_circus', sortingPath=sorting_path, sortingObject=sorting_object, recordingId=recording_id, recordingPath=le_recording['recordingPath'], recordingObject=le_recording['recordingObject'], tags=['contributed'], description=f''' SpykingCircus applied to {recording_id} (contributed by P. Yger) '''.strip() )) return le_sortings
def main(): SF_STUDY_SETS = kp.load_object( 'sha1://54d9ed77a2aa788b9ab67977476c2b51adb8a2c5/studysets.json' )['StudySets'] STUDY_SETS = [] for SF_STUDY_SET in SF_STUDY_SETS: if SF_STUDY_SET['name'] in study_set_names: STUDY_SET = { 'name': SF_STUDY_SET['name'], 'info': SF_STUDY_SET['info'], 'description': SF_STUDY_SET['description'], 'studies': [] } for SF_STUDY in SF_STUDY_SET['studies']: STUDY = { 'name': SF_STUDY['name'], 'studySetName': SF_STUDY['studySetName'], 'recordings': [] } for SF_RECORDING in SF_STUDY[ 'recordings'][: 3]: # for now only load up to 3 recordings per study recording_object = create_recording_object_from_spikeforest_recdir( SF_RECORDING['directory'], label=SF_RECORDING['name']) sorting_object = create_sorting_object_from_spikeforest_recdir( SF_RECORDING['directory'], label=SF_RECORDING['name']) print( '********************************************************************************************' ) print( f"{SF_RECORDING['studySetName']} {SF_RECORDING['studyName']} {SF_RECORDING['name']}" ) print( '********************************************************************************************' ) RECORDING = { "name": SF_RECORDING["name"], "studyName": SF_RECORDING["studyName"], "studySetName": SF_RECORDING["studySetName"], "recordingObject": recording_object, "sortingObject": sorting_object, "sampleRateHz": SF_RECORDING["sampleRateHz"], "numChannels": SF_RECORDING["numChannels"], "durationSec": SF_RECORDING["durationSec"], "numTrueUnits": SF_RECORDING["numTrueUnits"], "old": { "directory": SF_RECORDING["directory"], "firingsTrue": SF_RECORDING["firingsTrue"], "spikeSign": SF_RECORDING["spikeSign"] } } STUDY['recordings'].append(RECORDING) STUDY_SET['studies'].append(STUDY) STUDY_SETS.append(STUDY_SET) spikeforest_study_sets = {'studysets': STUDY_SETS} # spikeforest_obj['self_reference'] = ka.store_object(spikeforest_obj) spikeforest_study_sets_path = ka.store_object( spikeforest_study_sets, basename='spikeforest_study_sets.json') print(spikeforest_study_sets_path)
def load_known_recordings_dict(): try: x = kp.load_object(KNOWN_RECORDINGS_URI) except: raise Exception( 'Problem loading recordings dict. Perhaps you are not running the kachery-p2p daemon?' ) return x
def create_sorting_object_from_spikeforest_recdir(recdir, label): params = kp.load_object(recdir + '/params.json') firings_path = kp.load_file(recdir + '/firings_true.mda') firings_path = ka.store_file(firings_path, basename=label + '-firings.mda') sorting_object = dict(sorting_format='mda', data=dict(firings=firings_path, samplerate=params['samplerate'])) print(sorting_object) return sorting_object
def create_recording_object_from_spikeforest_recdir(recdir, label): raw_path = kp.load_file(recdir + '/raw.mda') raw_path = kp.store_file(raw_path, basename=label + '-raw.mda') # store with manifest print(raw_path) params = kp.load_object(recdir + '/params.json') geom_path = kp.load_file(recdir + '/geom.csv') geom = _load_geom_from_csv(geom_path) recording_object = dict(recording_format='mda', data=dict(raw=raw_path, geom=geom, params=params)) return recording_object
def __init__(self, probe_file, xml_file, nrs_file, dat_file): se.RecordingExtractor.__init__(self) # info = check_load_nrs(dirpath) # assert info is not None probe_obj = kp.load_object(probe_file) xml_file = kp.load_file(xml_file) # nrs_file = kp.load_file(nrs_file) dat_file = kp.load_file(dat_file) from xml.etree import ElementTree as ET xml = ET.parse(xml_file) root_element = xml.getroot() try: txt = root_element.find('acquisitionSystem/samplingRate').text assert txt is not None self._samplerate = float(txt) except: raise Exception('Unable to load acquisitionSystem/samplingRate') try: txt = root_element.find('acquisitionSystem/nChannels').text assert txt is not None self._nChannels = int(txt) except: raise Exception('Unable to load acquisitionSystem/nChannels') try: txt = root_element.find('acquisitionSystem/nBits').text assert txt is not None self._nBits = int(txt) except: raise Exception('Unable to load acquisitionSystem/nBits') if self._nBits == 16: dtype = np.int16 elif self._nBits == 32: dtype = np.int32 else: raise Exception(f'Unexpected nBits: {self._nBits}') self._rec = se.BinDatRecordingExtractor( dat_file, sampling_frequency=self._samplerate, numchan=self._nChannels, dtype=dtype) self._channel_ids = probe_obj['channel'] for ii in range(len(probe_obj['channel'])): channel = probe_obj['channel'][ii] x = probe_obj['x'][ii] y = probe_obj['y'][ii] z = probe_obj['z'][ii] group = probe_obj.get('group', probe_obj.get('shank'))[ii] self.set_channel_property(channel, 'location', [x, y, z]) self.set_channel_property(channel, 'group', group)
def _try_nrs_create_object(arg: Union[str, dict]) -> Union[None, dict]: if isinstance(arg, str): path = arg if path.startswith('sha1dir') or path.startswith('/'): dd = kp.read_dir(path) if dd is not None: probe_file = None xml_file = None nrs_file = None dat_file = None for f in dd['files'].keys(): if f.endswith('.json'): obj = kp.load_object(path + '/' + f) if obj.get('format_version', None) in ['flatiron-probe-0.1', 'flatiron-probe-0.2']: probe_file = path + '/' + f elif f.endswith('.xml'): xml_file = path + '/' + f elif f.endswith('.nrs'): nrs_file = path + '/' + f elif f.endswith('.dat'): dat_file = path + '/' + f if probe_file is not None and xml_file is not None and nrs_file is not None and dat_file is not None: data = dict( probe_file=probe_file, xml_file=xml_file, nrs_file=nrs_file, dat_file=dat_file ) return dict( recording_format='nrs', data=data ) if isinstance(arg, dict): if ('probe_file' in arg) and ('xml_file' in arg) and ('nrs_file' in arg) and ('dat_file' in arg): return dict( recording_format='nrs', data=dict( probe_file=arg['probe_file'], xml_file=arg['xml_file'], nrs_file=arg['nrs_file'], dat_file=arg['dat_file'] ) ) return None
def main(): # Sorting: cortexlab-single-phase-3 Curated (good units) for cortexlab-single-phase-3 (full) recording_object = kp.load_object( 'sha1://8b222e25bc4d9c792e4490ca322b5338e0795596/cortexlab-single-phase-3.json' ) sorting_object = { "sorting_format": "h5_v1", "data": { "h5_path": "sha1://68029d0eded8ca7d8f95c16dea81318966ae9b55/sorting.h5?manifest=12b0d8e37c7050a6fe636d4c16ed143bbd5dab0c" } } recording = le.LabboxEphysRecordingExtractor(recording_object) sorting = le.LabboxEphysSortingExtractor(sorting_object) h5_path = le.prepare_snippets_h5.run(sorting_object=sorting_object, recording_object=recording_object, start_frame=0, end_frame=30000 * 240).wait() print(h5_path)
def main(): # Load json containing info for spikeforest data spikeforest_study_sets = kp.load_object(SF_STUDY_SETS_URI) # Iterate through the study sets study_sets = spikeforest_study_sets['studysets'] print(f'Study sets: {[s["name"] for s in study_sets]}') for study_set in study_sets: if study_set['name'] in study_sets_to_load: # Iterate through the studies studies = study_set['studies'] print( f'Studies in {study_set["name"]}: {[s["name"] for s in studies]}' ) for study in studies: # Iterate through the recordings recordings = study['recordings'] STUDY_OUTPUT_DIR = os.path.join(GLOBAL_OUTPUT_DIR, study["name"]) os.makedirs(STUDY_OUTPUT_DIR, exist_ok=True) print( f'Recordings in {study["name"]}: {[r["name"] for r in recordings]}' ) for rec in recordings: RECORDING_OUTPUT_DIR = os.path.join( STUDY_OUTPUT_DIR, rec["name"]) os.makedirs(RECORDING_OUTPUT_DIR, exist_ok=True) # Create recording/sorting extractors recording = nd.LabboxEphysRecordingExtractor( rec['recordingObject'], download=True) sorting_true = nd.LabboxEphysSortingExtractor( rec['sortingObject']) filename = f'{rec["name"].replace(" ", "_")}.dat' probename = f'{rec["name"].replace(" ", "_")}.prb' sortname = f'{rec["name"].replace(" ", "_")}.npz' # Display information print(filename, probename) duration_sec = recording.get_num_frames( ) / recording.get_sampling_frequency() print( '***************************************************************************' ) print( f"{rec['studySetName']} {rec['studyName']} {rec['name']}" ) print(f'Num channels: {len(recording.get_channel_ids())}') print(f'Duration: {duration_sec} sec') print( f'Num. true units: {len(sorting_true.get_unit_ids())}') # TODO: write this somewhere to disk in int16 format, similar to the download_recordings.py script recording.save_to_probe_file( os.path.join(RECORDING_OUTPUT_DIR, probename)) se.BinDatRecordingExtractor.write_recording( recording, os.path.join(RECORDING_OUTPUT_DIR, filename), dtype='int16') se.NpzSortingExtractor.write_sorting( sorting_true, os.path.join(RECORDING_OUTPUT_DIR, sortname)) print( f'########### DONE {os.path.join(RECORDING_OUTPUT_DIR, filename)} ###########' )
'type': 'remote', 'uri': crfeed_uri, 'cr_partition': 'partition3' }, 'timeseries': { 'type': 'local' } } } elif os.environ.get('LABBOX_CONFIG_URI', None) is not None: config_uri = os.environ['LABBOX_CONFIG_URI'] num_tries = 0 while True: try: print(f'Trying to load config from: {config_uri}') labbox_config = kp.load_object(config_uri) break except: if num_tries > 20: raise time.sleep(2) num_tries = num_tries + 1 assert labbox_config is not None, f'Unable to load config from subfeed: {config_uri}' else: labbox_config = { 'job_handlers': { 'default': { 'type': 'local' }, 'partition1': {
import hither as hi import labbox_ephys as le import kachery_p2p as kp import kachery as ka import json # recording_object = kp.load_object('sha1://e3f764b72eafa0704cb4fef23d101e10e5404043/allen_mouse419112_probeE-ch0-7-10sec.json') recording_object = kp.load_object( 'sha1://8b222e25bc4d9c792e4490ca322b5338e0795596/cortexlab-single-phase-3.json' ) recording_object = dict( recording_format='subrecording', data=dict(recording=recording_object, start_frame=0, end_frame=30000 * 60 * 10, channel_ids=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])) # jc = hi.JobCache(use_tempdir=True) jc = None with hi.RemoteJobHandler( # substitute your own compute resource URI here compute_resource_uri= 'feed://09b27ce6c71add9fe6effaf351fce98d867d6fa002333a8b06565b0a108fb0ba?name=ephys1' # compute_resource_uri='feed://644c145d5f6088623ee59f3437655e185657a6d9a9676294f26ae504423565fa?name=lke9849-12258-5f50fc6bb944 ' ) as jh: with hi.Config(container=True, job_cache=jc, job_handler=jh, required_files=recording_object): x = le.sorters.mountainsort4.run( recording_object=recording_object).wait()
def _create_object_for_arg(arg: Union[str, dict]) -> Union[dict, None]: # if arg is a string ending with .json then replace arg by the object if (isinstance(arg, str)) and (arg.endswith('.json')): path = arg x = kp.load_object(path) if x is None: raise Exception(f'Unable to load object: {path}') return _create_object_for_arg(x) # check to see if it already has the recording_format field. If so, just return arg if (isinstance(arg, dict)) and ('recording_format' in arg): return arg # if has form dict(path='...') then replace by the string if (isinstance(arg, dict)) and ('path' in arg) and (type(arg['path']) == str): return _create_object_for_arg(arg['path']) # if has type LabboxEphysRecordingExtractor, then just get the object from arg.object() if isinstance(arg, LabboxEphysRecordingExtractor): return arg.object() # See if it has format 'nwb' if isinstance(arg, str) and arg.endswith('.nwb'): return dict( recording_format='nwb', data=dict( path=arg ) ) # See if it has format 'mda' obj = _try_mda_create_object(arg) if obj is not None: return obj # See if it has format 'nrs' obj = _try_nrs_create_object(arg) if obj is not None: return obj # See if it is of type filtered if (isinstance(arg, dict)) and ('recording' in arg) and ('filters' in arg): return dict( recording_format='filtered', data=dict( filters=arg['filters'], recording=_create_object_for_arg(arg['recording']) ) ) # See if it is type subrecording if (isinstance(arg, dict)) and ('recording' in arg) and ('group' in arg): return dict( recording_format='subrecording', data=dict( group=arg['group'], recording=_create_object_for_arg(arg['recording']) ) ) if (isinstance(arg, dict)) and ('recording' in arg) and ('groups' in arg): return dict( recording_format='subrecording', data=dict( groups=arg['groups'], recording=_create_object_for_arg(arg['recording']) ) ) if (isinstance(arg, dict)) and ('recording' in arg) and ('channel_ids' in arg): return dict( recording_format='subrecording', data=dict( channel_ids=arg['channel_ids'], recording=_create_object_for_arg(arg['recording']) ) ) return None
def __init__(self, *, recording_directory=None, timeseries_path=None, download=False, samplerate=None, geom=None, geom_path=None, params_path=None): RecordingExtractor.__init__(self) if recording_directory: if timeseries_path is None: timeseries_path = recording_directory + '/raw.mda' geom_path = recording_directory + '/geom.csv' params_path = recording_directory + '/params.json' self._timeseries_path = timeseries_path if params_path: self._dataset_params = kp.load_object(params_path) if not self._dataset_params: raise Exception( 'Unable to load recording params: {}'.format(params_path)) self._samplerate = self._dataset_params['samplerate'] else: self._dataset_params = dict(samplerate=samplerate) self._samplerate = samplerate if download: path0 = kp.load_file(self._timeseries_path) if not path0: raise Exception('Unable to download file: ' + self._timeseries_path) else: path0 = self._timeseries_path if geom_path is not None: if not kp.load_file(geom_path): raise Exception('Unable to download file: ' + geom_path) if params_path is not None: if not kp.load_file(params_path): raise Exception('Unable to download file: ' + params_path) self._timeseries_path = path0 self._timeseries = DiskReadMda(self._timeseries_path) if self._timeseries is None: raise Exception('Unable to load timeseries: {}'.format( self._timeseries_path)) X = self._timeseries if geom is not None: self._geom = geom elif geom_path: geom_path2 = kp.load_file(geom_path) self._geom = np.genfromtxt(geom_path2, delimiter=',') else: self._geom = np.zeros((X.N1(), 2)) if self._geom.shape[0] != X.N1(): # raise Exception( # 'Incompatible dimensions between geom.csv and timeseries file {} <> {}'.format(self._geom.shape[0], X.N1())) print( 'WARNING: Incompatible dimensions between geom.csv and timeseries file {} <> {}' .format(self._geom.shape[0], X.N1())) self._geom = np.zeros((X.N1(), 2)) if self._timeseries_path.startswith( 'sha1://') or self._timeseries_path.startswith('sha1dir://'): timeseries_hash_path = self._timeseries_path else: timeseries_hash_path = 'sha1://' + ka.get_file_hash( self._timeseries_path) self._hash = ka.get_object_hash( dict(timeseries=timeseries_hash_path, samplerate=self._samplerate, geom=_json_serialize(self._geom))) self._num_channels = X.N1() self._num_timepoints = X.N2() for m in range(self._num_channels): self.set_channel_property(m, 'location', self._geom[m, :]) self.set_channel_property(m, 'group', 0)