def reup_file(object: Any): thekey = recording_key if recording_key[ 'key_field'] in object else sorting_key if VERBOSE: print( f"Executing: object['{thekey['key_field']}'] = kp.store_file(kp.load_file(object['{thekey['key_field']}']), basename={thekey['basename']})" ) if DRY_RUN: return # Turns out that kachery doesn't handle big files without manifests all that well. Which was the point of this exercise. # So let's take advantage of being on the same filesystem to do a little magic. raw = object[thekey["key_field"]] print(f'Got raw: {raw}') if ('sha1dir' in raw): key_field = object[thekey['key_field']] sha1dir = key_field.split('/')[2] print(f'Got dir: {sha1dir}') kp.load_file(f'sha1://{sha1dir}') print(f"Fetching hash for file: {key_field}") reformed_field = trim_dir_annotation(f"{key_field}") if VERBOSE: print(f"(using reformed field {reformed_field})") try: sha1 = ka.get_file_hash(reformed_field) except: if FORCE: print( f"\t** Trimmed lookup didn't work, falling back to kp.load_file({key_field})" ) kp.load_file(key_field) sha1 = ka.get_file_hash(key_field) else: print( f"Error on ka.get_file_hash({reformed_field}) -- aborting") exit() else: #sha1 = '/'.join(raw.split('/')[2:]) sha1 = raw.split('/')[2] print(f'Got sha1: {sha1}') src_path = f'/mnt/ceph/users/magland/kachery-storage/sha1/{sha1[0]}{sha1[1]}/{sha1[2]}{sha1[3]}/{sha1[4]}{sha1[5]}/{sha1}' dest_path = f'/mnt/ceph/users/jsoules/kachery-storage/sha1/{sha1[0]}{sha1[1]}/{sha1[2]}{sha1[3]}/{sha1[4]}{sha1[5]}/{sha1}' if VERBOSE: print(f"Executing: shutil.copyfile({src_path}, {dest_path})") if not exists(dest_path): pathlib.Path('/'.join(dest_path.split('/')[:-1])).mkdir(parents=True, exist_ok=True) copyfile(src_path, dest_path) print("\tCompleted copy operation.") object[thekey['key_field']] = kp.store_file( kp.load_file(object[thekey['key_field']]), basename=f"{thekey['basename']}")
def make(self, key): print('Computing SHA-1 and storing in kachery...') nwb_file_abs_path = Nwbfile.get_abs_path(key['nwb_file_name']) with ka.config(use_hard_links=True): kachery_path = ka.store_file(nwb_file_abs_path) key['nwb_file_sha1'] = ka.get_file_hash(kachery_path) self.insert1(key)
def __init__(self, *, recording_directory=None, timeseries_path=None, download=False, samplerate=None, geom=None, geom_path=None, params_path=None): RecordingExtractor.__init__(self) if recording_directory: timeseries_path = recording_directory + '/raw.mda' geom_path = recording_directory + '/geom.csv' params_path = recording_directory + '/params.json' self._timeseries_path = timeseries_path if params_path: self._dataset_params = ka.load_object(params_path) self._samplerate = self._dataset_params['samplerate'] else: self._dataset_params = dict(samplerate=samplerate) self._samplerate = samplerate if download: path0 = ka.load_file(path=self._timeseries_path) if not path0: raise Exception('Unable to realize file: ' + self._timeseries_path) self._timeseries_path = path0 self._timeseries = DiskReadMda(self._timeseries_path) if self._timeseries is None: raise Exception('Unable to load timeseries: {}'.format( self._timeseries_path)) X = self._timeseries if geom is not None: self._geom = geom elif geom_path: geom_path2 = ka.load_file(geom_path) self._geom = np.genfromtxt(geom_path2, delimiter=',') else: self._geom = np.zeros((X.N1(), 2)) if self._geom.shape[0] != X.N1(): # raise Exception( # 'Incompatible dimensions between geom.csv and timeseries file {} <> {}'.format(self._geom.shape[0], X.N1())) print( 'WARNING: Incompatible dimensions between geom.csv and timeseries file {} <> {}' .format(self._geom.shape[0], X.N1())) self._geom = np.zeros((X.N1(), 2)) self._hash = ka.get_object_hash( dict(timeseries=ka.get_file_hash(self._timeseries_path), samplerate=self._samplerate, geom=_json_serialize(self._geom))) self._num_channels = X.N1() self._num_timepoints = X.N2() for m in range(self._num_channels): self.set_channel_property(m, 'location', self._geom[m, :])
def make(self, key): print('Computing SHA-1 and storing in kachery...') analysis_file_abs_path = AnalysisNwbfile().get_abs_path(key['analysis_file_name']) with ka.config(use_hard_links=True): kachery_path = ka.store_file(analysis_file_abs_path) key['analysis_file_sha1'] = ka.get_file_hash(kachery_path) self.insert1(key) #TODO: load from kachery and fetch_nwb
def patch_recording_geom(recording, geom_fname): print(f'PATCHING geom for recording: {recording["name"]}') geom_info = ka.get_file_info(geom_fname) x = recording['directory'] y = ka.store_dir(x).replace('sha1dir://', 'sha1://') obj = ka.load_object(y) obj['files']['geom.csv'] = dict(size=geom_info['size'], sha1=geom_info['sha1']) x2 = ka.store_object(obj) recording['directory'] = 'sha1dir://' + ka.get_file_hash(x2) + '.patched'
def main(): test1() f = kp.load_feed('feed://' + os.environ['FEED_ID']) N1 = 10000 N2 = 1000 a = kp.store_npy(np.meshgrid(np.arange(N1), np.arange(N2))[0]) sf = f.get_subfeed('sf1') sf.append_message({'a': a, 'N1': N1, 'N2': N2}) # test invalid manifest b = kp.store_npy(np.meshgrid(np.arange(N1 + 1), np.arange(N2))[0]) invalid_manifest = kp.store_object({'invalid': True}) b_invalid_manifest = b.split('?')[0] + '?manifest=' + ka.get_file_hash( invalid_manifest) sf.append_message({'b_invalid_manifest': b_invalid_manifest})
def hash(self): return ka.get_object_hash( dict(firings=ka.get_file_hash(self._firings_path), samplerate=self._sampling_frequency))
def handle_message(self, msg): type0 = msg.get('type') if type0 == 'reportClientInfo': print('reported client info:', msg) self._feed_uri = msg['clientInfo']['feedUri'] self._workspace_name = msg['clientInfo']['workspaceName'] self._readonly = msg['clientInfo']['readOnly'] if not self._feed_uri: self._feed_uri = 'feed://' + self._default_feed_id # self._feed_uri = kp.create_feed(feed_name='labbox-ephys-default').get_uri() # assert self._feed_uri.startswith('sha1://'), 'For now, feedUri must start with sha1://' self._feed = kp.load_feed(self._feed_uri) for key in ['recordings', 'sortings']: self._subfeed_positions[key] = 0 subfeed_name = dict(key=key, workspaceName=self._workspace_name) subfeed = self._feed.get_subfeed(subfeed_name) messages = subfeed.get_next_messages(wait_msec=10) for m in messages: if 'action' in m: self._send_message({ 'type': 'action', 'action': m['action'] }) else: print(f'WARNING: No action in message for {key}') self._subfeed_positions[ key] = self._subfeed_positions[key] + len(messages) self._send_message({'type': 'reportInitialLoadComplete'}) if self._feed: qm = self._queued_document_action_messages self._queued_document_action_messages = [] for m in qm: self.handle_message(m) elif type0 == 'appendDocumentAction': if self._readonly: print( 'Cannot append document action. This is a readonly feed.') return if self._feed is None: self._queued_document_action_messages.append(msg) else: subfeed_name = dict(key=msg['key'], workspaceName=self._workspace_name) subfeed = self._feed.get_subfeed(subfeed_name) subfeed.append_message({'action': msg['action']}) elif type0 == 'hitherCreateJob': functionName = msg['functionName'] kwargs = msg['kwargs'] client_job_id = msg['clientJobId'] try: outer_job = hi.run(functionName, **kwargs, labbox=self._labbox_context) except Exception as err: self._send_message({ 'type': 'hitherJobError', 'job_id': client_job_id, 'client_job_id': client_job_id, 'error_message': f'Error creating outer job: {str(err)}', 'runtime_info': None }) return try: job_or_result = outer_job.wait() except Exception as err: self._send_message({ 'type': 'hitherJobError', 'job_id': outer_job._job_id, 'client_job_id': client_job_id, 'error_message': str(err), 'runtime_info': outer_job.get_runtime_info() }) return if hasattr(job_or_result, '_job_id'): job = job_or_result setattr(job, '_client_job_id', client_job_id) job_id = job._job_id self._jobs_by_id[job_id] = job print( f'======== Created hither job (2): {job_id} {functionName}' ) self._send_message({ 'type': 'hitherJobCreated', 'job_id': job_id, 'client_job_id': client_job_id }) else: result = job_or_result msg = { 'type': 'hitherJobFinished', 'client_job_id': client_job_id, 'job_id': client_job_id, # 'result': _make_json_safe(result), 'result_sha1': ka.get_file_hash(ka.store_object(_make_json_safe(result))), 'runtime_info': outer_job.get_runtime_info() } elif type0 == 'hitherCancelJob': job_id = msg['job_id'] assert job_id, 'Missing job_id' assert job_id in self._jobs_by_id, f'No job with id: {job_id}' job = self._jobs_by_id[job_id] job.cancel()
def iterate(self): subfeed_watches = {} if (self._feed_uri is not None) and (self._feed_uri.startswith('feed://')): for key in ['recordings', 'sortings']: subfeed_name = dict(workspaceName=self._workspace_name, key=key) subfeed_watches[key] = dict( feedId=self._feed._feed_id, # fix this subfeedName=subfeed_name, position=self._subfeed_positions[key]) for w in self._additional_subfeed_watches: subfeed_watches[w['watch_name']] = dict( feedId=w['feed_id'], subfeedHash=w['subfeed_hash'], position=self._subfeed_positions[w['watch_name']]) if len(subfeed_watches.keys()) > 0: messages = kp.watch_for_new_messages( subfeed_watches=subfeed_watches, wait_msec=100) for key in messages.keys(): if key in ['recordings', 'sortings']: for m in messages[key]: if 'action' in m: self._send_message({ 'type': 'action', 'action': m['action'] }) else: print( f'WARNING: no action in feed message for {key}' ) else: for m in messages[key]: self._send_message({ 'type': 'subfeedMessage', 'watchName': key, 'message': m }) self._subfeed_positions[ key] = self._subfeed_positions[key] + len(messages[key]) hi.wait(0) job_ids = list(self._jobs_by_id.keys()) for job_id in job_ids: job = self._jobs_by_id[job_id] status0 = job.get_status() if status0 == hi.JobStatus.FINISHED: print( f'======== Finished hither job: {job_id} {job.get_label()}' ) result = job.get_result() runtime_info = job.get_runtime_info() del self._jobs_by_id[job_id] msg = { 'type': 'hitherJobFinished', 'client_job_id': job._client_job_id, 'job_id': job_id, # 'result': _make_json_safe(result), 'result_sha1': ka.get_file_hash(ka.store_object(_make_json_safe(result))), 'runtime_info': runtime_info } self._send_message(msg) elif status0 == hi.JobStatus.ERROR: exc = job.get_exception() runtime_info = job.get_runtime_info() del self._jobs_by_id[job_id] msg = { 'type': 'hitherJobError', 'job_id': job_id, 'client_job_id': job._client_job_id, 'error_message': str(exc), 'runtime_info': runtime_info } self._send_message(msg)