Python get_file_hashの例、kachery.get_file_hash Pythonの例

コード例 #1

0

ファイルを表示

ファイル: re-kacheryize.py プロジェクト: flatironinstitute/spikeforest_recordings

def reup_file(object: Any):
    thekey = recording_key if recording_key[
        'key_field'] in object else sorting_key
    if VERBOSE:
        print(
            f"Executing: object['{thekey['key_field']}'] = kp.store_file(kp.load_file(object['{thekey['key_field']}']), basename={thekey['basename']})"
        )
    if DRY_RUN: return

    # Turns out that kachery doesn't handle big files without manifests all that well. Which was the point of this exercise.
    # So let's take advantage of being on the same filesystem to do a little magic.
    raw = object[thekey["key_field"]]
    print(f'Got raw: {raw}')
    if ('sha1dir' in raw):
        key_field = object[thekey['key_field']]
        sha1dir = key_field.split('/')[2]
        print(f'Got dir: {sha1dir}')
        kp.load_file(f'sha1://{sha1dir}')
        print(f"Fetching hash for file: {key_field}")
        reformed_field = trim_dir_annotation(f"{key_field}")
        if VERBOSE: print(f"(using reformed field {reformed_field})")
        try:
            sha1 = ka.get_file_hash(reformed_field)
        except:
            if FORCE:
                print(
                    f"\t** Trimmed lookup didn't work, falling back to kp.load_file({key_field})"
                )
                kp.load_file(key_field)
                sha1 = ka.get_file_hash(key_field)
            else:
                print(
                    f"Error on ka.get_file_hash({reformed_field}) -- aborting")
                exit()
    else:
        #sha1 = '/'.join(raw.split('/')[2:])
        sha1 = raw.split('/')[2]
    print(f'Got sha1: {sha1}')
    src_path = f'/mnt/ceph/users/magland/kachery-storage/sha1/{sha1[0]}{sha1[1]}/{sha1[2]}{sha1[3]}/{sha1[4]}{sha1[5]}/{sha1}'
    dest_path = f'/mnt/ceph/users/jsoules/kachery-storage/sha1/{sha1[0]}{sha1[1]}/{sha1[2]}{sha1[3]}/{sha1[4]}{sha1[5]}/{sha1}'
    if VERBOSE: print(f"Executing: shutil.copyfile({src_path}, {dest_path})")
    if not exists(dest_path):
        pathlib.Path('/'.join(dest_path.split('/')[:-1])).mkdir(parents=True,
                                                                exist_ok=True)
        copyfile(src_path, dest_path)
        print("\tCompleted copy operation.")
    object[thekey['key_field']] = kp.store_file(
        kp.load_file(object[thekey['key_field']]),
        basename=f"{thekey['basename']}")

コード例 #2

0

ファイルを表示

 def make(self, key):
     print('Computing SHA-1 and storing in kachery...')
     nwb_file_abs_path = Nwbfile.get_abs_path(key['nwb_file_name'])
     with ka.config(use_hard_links=True):
         kachery_path = ka.store_file(nwb_file_abs_path)
         key['nwb_file_sha1'] = ka.get_file_hash(kachery_path)
     self.insert1(key)

コード例 #3

0

ファイルを表示

    def __init__(self,
                 *,
                 recording_directory=None,
                 timeseries_path=None,
                 download=False,
                 samplerate=None,
                 geom=None,
                 geom_path=None,
                 params_path=None):
        RecordingExtractor.__init__(self)
        if recording_directory:
            timeseries_path = recording_directory + '/raw.mda'
            geom_path = recording_directory + '/geom.csv'
            params_path = recording_directory + '/params.json'
        self._timeseries_path = timeseries_path
        if params_path:
            self._dataset_params = ka.load_object(params_path)
            self._samplerate = self._dataset_params['samplerate']
        else:
            self._dataset_params = dict(samplerate=samplerate)
            self._samplerate = samplerate

        if download:
            path0 = ka.load_file(path=self._timeseries_path)
            if not path0:
                raise Exception('Unable to realize file: ' +
                                self._timeseries_path)
            self._timeseries_path = path0

        self._timeseries = DiskReadMda(self._timeseries_path)
        if self._timeseries is None:
            raise Exception('Unable to load timeseries: {}'.format(
                self._timeseries_path))
        X = self._timeseries
        if geom is not None:
            self._geom = geom
        elif geom_path:
            geom_path2 = ka.load_file(geom_path)
            self._geom = np.genfromtxt(geom_path2, delimiter=',')
        else:
            self._geom = np.zeros((X.N1(), 2))

        if self._geom.shape[0] != X.N1():
            # raise Exception(
            #    'Incompatible dimensions between geom.csv and timeseries file {} <> {}'.format(self._geom.shape[0], X.N1()))
            print(
                'WARNING: Incompatible dimensions between geom.csv and timeseries file {} <> {}'
                .format(self._geom.shape[0], X.N1()))
            self._geom = np.zeros((X.N1(), 2))

        self._hash = ka.get_object_hash(
            dict(timeseries=ka.get_file_hash(self._timeseries_path),
                 samplerate=self._samplerate,
                 geom=_json_serialize(self._geom)))

        self._num_channels = X.N1()
        self._num_timepoints = X.N2()
        for m in range(self._num_channels):
            self.set_channel_property(m, 'location', self._geom[m, :])

コード例 #4

0

ファイルを表示

ファイル: common_nwbfile.py プロジェクト: u19teamdope/nwb_datajoint

    def make(self, key):
        print('Computing SHA-1 and storing in kachery...')
        analysis_file_abs_path = AnalysisNwbfile().get_abs_path(key['analysis_file_name'])
        with ka.config(use_hard_links=True):
            kachery_path = ka.store_file(analysis_file_abs_path)
            key['analysis_file_sha1'] = ka.get_file_hash(kachery_path)
        self.insert1(key)

    #TODO: load from kachery and fetch_nwb

コード例 #5

0

ファイルを表示

ファイル: prepare_recordings_add.py プロジェクト: flatironinstitute/spikeforest_recordings

def patch_recording_geom(recording, geom_fname):
    print(f'PATCHING geom for recording: {recording["name"]}')
    geom_info = ka.get_file_info(geom_fname)
    x = recording['directory']
    y = ka.store_dir(x).replace('sha1dir://', 'sha1://')
    obj = ka.load_object(y)
    obj['files']['geom.csv'] = dict(size=geom_info['size'],
                                    sha1=geom_info['sha1'])
    x2 = ka.store_object(obj)
    recording['directory'] = 'sha1dir://' + ka.get_file_hash(x2) + '.patched'

コード例 #6

0

ファイルを表示

def main():
    test1()

    f = kp.load_feed('feed://' + os.environ['FEED_ID'])
    N1 = 10000
    N2 = 1000
    a = kp.store_npy(np.meshgrid(np.arange(N1), np.arange(N2))[0])
    sf = f.get_subfeed('sf1')
    sf.append_message({'a': a, 'N1': N1, 'N2': N2})

    # test invalid manifest
    b = kp.store_npy(np.meshgrid(np.arange(N1 + 1), np.arange(N2))[0])
    invalid_manifest = kp.store_object({'invalid': True})
    b_invalid_manifest = b.split('?')[0] + '?manifest=' + ka.get_file_hash(
        invalid_manifest)
    sf.append_message({'b_invalid_manifest': b_invalid_manifest})

コード例 #7

0

ファイルを表示

 def hash(self):
     return ka.get_object_hash(
         dict(firings=ka.get_file_hash(self._firings_path),
              samplerate=self._sampling_frequency))

コード例 #8

0

ファイルを表示

 def handle_message(self, msg):
     type0 = msg.get('type')
     if type0 == 'reportClientInfo':
         print('reported client info:', msg)
         self._feed_uri = msg['clientInfo']['feedUri']
         self._workspace_name = msg['clientInfo']['workspaceName']
         self._readonly = msg['clientInfo']['readOnly']
         if not self._feed_uri:
             self._feed_uri = 'feed://' + self._default_feed_id
             # self._feed_uri = kp.create_feed(feed_name='labbox-ephys-default').get_uri()
         # assert self._feed_uri.startswith('sha1://'), 'For now, feedUri must start with sha1://'
         self._feed = kp.load_feed(self._feed_uri)
         for key in ['recordings', 'sortings']:
             self._subfeed_positions[key] = 0
             subfeed_name = dict(key=key,
                                 workspaceName=self._workspace_name)
             subfeed = self._feed.get_subfeed(subfeed_name)
             messages = subfeed.get_next_messages(wait_msec=10)
             for m in messages:
                 if 'action' in m:
                     self._send_message({
                         'type': 'action',
                         'action': m['action']
                     })
                 else:
                     print(f'WARNING: No action in message for {key}')
             self._subfeed_positions[
                 key] = self._subfeed_positions[key] + len(messages)
         self._send_message({'type': 'reportInitialLoadComplete'})
         if self._feed:
             qm = self._queued_document_action_messages
             self._queued_document_action_messages = []
             for m in qm:
                 self.handle_message(m)
     elif type0 == 'appendDocumentAction':
         if self._readonly:
             print(
                 'Cannot append document action. This is a readonly feed.')
             return
         if self._feed is None:
             self._queued_document_action_messages.append(msg)
         else:
             subfeed_name = dict(key=msg['key'],
                                 workspaceName=self._workspace_name)
             subfeed = self._feed.get_subfeed(subfeed_name)
             subfeed.append_message({'action': msg['action']})
     elif type0 == 'hitherCreateJob':
         functionName = msg['functionName']
         kwargs = msg['kwargs']
         client_job_id = msg['clientJobId']
         try:
             outer_job = hi.run(functionName,
                                **kwargs,
                                labbox=self._labbox_context)
         except Exception as err:
             self._send_message({
                 'type': 'hitherJobError',
                 'job_id': client_job_id,
                 'client_job_id': client_job_id,
                 'error_message': f'Error creating outer job: {str(err)}',
                 'runtime_info': None
             })
             return
         try:
             job_or_result = outer_job.wait()
         except Exception as err:
             self._send_message({
                 'type': 'hitherJobError',
                 'job_id': outer_job._job_id,
                 'client_job_id': client_job_id,
                 'error_message': str(err),
                 'runtime_info': outer_job.get_runtime_info()
             })
             return
         if hasattr(job_or_result, '_job_id'):
             job = job_or_result
             setattr(job, '_client_job_id', client_job_id)
             job_id = job._job_id
             self._jobs_by_id[job_id] = job
             print(
                 f'======== Created hither job (2): {job_id} {functionName}'
             )
             self._send_message({
                 'type': 'hitherJobCreated',
                 'job_id': job_id,
                 'client_job_id': client_job_id
             })
         else:
             result = job_or_result
             msg = {
                 'type':
                 'hitherJobFinished',
                 'client_job_id':
                 client_job_id,
                 'job_id':
                 client_job_id,
                 # 'result': _make_json_safe(result),
                 'result_sha1':
                 ka.get_file_hash(ka.store_object(_make_json_safe(result))),
                 'runtime_info':
                 outer_job.get_runtime_info()
             }
     elif type0 == 'hitherCancelJob':
         job_id = msg['job_id']
         assert job_id, 'Missing job_id'
         assert job_id in self._jobs_by_id, f'No job with id: {job_id}'
         job = self._jobs_by_id[job_id]
         job.cancel()

コード例 #9

0

ファイルを表示

 def iterate(self):
     subfeed_watches = {}
     if (self._feed_uri
             is not None) and (self._feed_uri.startswith('feed://')):
         for key in ['recordings', 'sortings']:
             subfeed_name = dict(workspaceName=self._workspace_name,
                                 key=key)
             subfeed_watches[key] = dict(
                 feedId=self._feed._feed_id,  # fix this
                 subfeedName=subfeed_name,
                 position=self._subfeed_positions[key])
     for w in self._additional_subfeed_watches:
         subfeed_watches[w['watch_name']] = dict(
             feedId=w['feed_id'],
             subfeedHash=w['subfeed_hash'],
             position=self._subfeed_positions[w['watch_name']])
     if len(subfeed_watches.keys()) > 0:
         messages = kp.watch_for_new_messages(
             subfeed_watches=subfeed_watches, wait_msec=100)
         for key in messages.keys():
             if key in ['recordings', 'sortings']:
                 for m in messages[key]:
                     if 'action' in m:
                         self._send_message({
                             'type': 'action',
                             'action': m['action']
                         })
                     else:
                         print(
                             f'WARNING: no action in feed message for {key}'
                         )
             else:
                 for m in messages[key]:
                     self._send_message({
                         'type': 'subfeedMessage',
                         'watchName': key,
                         'message': m
                     })
             self._subfeed_positions[
                 key] = self._subfeed_positions[key] + len(messages[key])
     hi.wait(0)
     job_ids = list(self._jobs_by_id.keys())
     for job_id in job_ids:
         job = self._jobs_by_id[job_id]
         status0 = job.get_status()
         if status0 == hi.JobStatus.FINISHED:
             print(
                 f'======== Finished hither job: {job_id} {job.get_label()}'
             )
             result = job.get_result()
             runtime_info = job.get_runtime_info()
             del self._jobs_by_id[job_id]
             msg = {
                 'type':
                 'hitherJobFinished',
                 'client_job_id':
                 job._client_job_id,
                 'job_id':
                 job_id,
                 # 'result': _make_json_safe(result),
                 'result_sha1':
                 ka.get_file_hash(ka.store_object(_make_json_safe(result))),
                 'runtime_info':
                 runtime_info
             }
             self._send_message(msg)
         elif status0 == hi.JobStatus.ERROR:
             exc = job.get_exception()
             runtime_info = job.get_runtime_info()
             del self._jobs_by_id[job_id]
             msg = {
                 'type': 'hitherJobError',
                 'job_id': job_id,
                 'client_job_id': job._client_job_id,
                 'error_message': str(exc),
                 'runtime_info': runtime_info
             }
             self._send_message(msg)