def __init__(self, arg, download=False): super().__init__() self._hash = None if isinstance(arg, str): arg = dict(path=arg) if isinstance(arg, se.RecordingExtractor): self._recording = arg else: self._recording = None # filters if ('recording' in arg) and ('filters' in arg): recording1 = AutoRecordingExtractor(arg['recording']) self._recording = self._apply_filters(recording1, arg['filters']) return if 'kachery_config' in arg: ka.set_config(**arg['kachery_config']) path = arg.get('path', '') if 'nwb_path' in arg: self._recording = NwbElectricalSeriesRecordingExtractor( path=path, nwb_path=arg['nwb_path']) elif path.endswith('.mda'): if 'samplerate' not in arg: raise Exception('Missing argument: samplerate') samplerate = arg['samplerate'] self._recording = MdaRecordingExtractor(timeseries_path=path, samplerate=samplerate, download=download) hash0 = _sha1_of_object( dict(timeseries_sha1=ka.get_file_info( path, algorithm='sha1')['sha1'], samplerate=samplerate)) setattr(self, 'hash', hash0) elif path.endswith('.nwb.json'): self._recording = NwbJsonRecordingExtractor(file_path=path) hash0 = ka.get_file_info(path)['sha1'] setattr(self, 'hash', hash0) elif path.endswith('.json') and (not path.endswith('.nwb.json')): obj = ka.load_object(path) if ('raw' in obj) and ('params' in obj) and ('geom' in obj): self._recording = MdaRecordingExtractor( timeseries_path=obj['raw'], samplerate=obj['params']['samplerate'], geom=np.array(obj['geom'])) else: raise Exception('Problem initializing recording extractor') elif ka.get_file_info(path + '/raw.mda'): self._recording = MdaRecordingExtractor( recording_directory=path, download=download) else: raise Exception('Unable to initialize recording extractor.') self.copy_channel_properties(recording=self._recording)
def _read_header(path, verbose=True): info0 = ka.get_file_info(path) if info0 is None: raise Exception(f'Unable to find file: {path}') bytes0 = ka.load_bytes(path, start=0, end=min(200, info0['size'])) if bytes0 is None: ka.set_config(fr='default_readonly') print(ka.get_file_info(path)) raise Exception('Unable to load header bytes from {}'.format(path)) f = io.BytesIO(bytes0) try: dt_code = _read_int32(f) _ = _read_int32(f) # num bytes per entry num_dims = _read_int32(f) uses64bitdims = False if (num_dims < 0): uses64bitdims = True num_dims = -num_dims if (num_dims < 1) or (num_dims > 6): # allow single dimension as of 12/6/17 if verbose: print("Invalid number of dimensions: {}".format(num_dims)) f.close() return None dims = [] dimprod = 1 if uses64bitdims: for _ in range(0, num_dims): tmp0 = _read_int64(f) dimprod = dimprod * tmp0 dims.append(tmp0) else: for _ in range(0, num_dims): tmp0 = _read_int32(f) dimprod = dimprod * tmp0 dims.append(tmp0) dt = _dt_from_dt_code(dt_code) if dt is None: if verbose: print("Invalid data type code: {}".format(dt_code)) f.close() return None H = MdaHeader(dt, dims) if (uses64bitdims): H.uses64bitdims = True H.header_size = 3 * 4 + H.num_dims * 8 f.close() return H except Exception as e: # catch *all* exceptions if verbose: print(e) f.close() return None
def __init__(self, arg, samplerate=None): super().__init__() self._hash = None if isinstance(arg, se.SortingExtractor): self._sorting = arg self.copy_unit_properties(sorting=self._sorting) else: self._sorting = None if type(arg) == str: arg = dict(path=arg, samplerate=samplerate) if type(arg) == dict: if 'kachery_config' in arg: ka.set_config(**arg['kachery_config']) if 'path' in arg: path = arg['path'] if ka.get_file_info(path): file_path = ka.load_file(path) if not file_path: raise Exception( 'Unable to realize file: {}'.format(path)) self._init_from_file(file_path, original_path=path, kwargs=arg) else: raise Exception('Not a file: {}'.format(path)) else: raise Exception('Unable to initialize sorting extractor') else: raise Exception( 'Unable to initialize sorting extractor (unexpected type)')
def patch_recording_geom(recording, geom_fname): print(f'PATCHING geom for recording: {recording["name"]}') geom_info = ka.get_file_info(geom_fname) x = recording['directory'] y = ka.store_dir(x).replace('sha1dir://', 'sha1://') obj = ka.load_object(y) obj['files']['geom.csv'] = dict(size=geom_info['size'], sha1=geom_info['sha1']) x2 = ka.store_object(obj) recording['directory'] = 'sha1dir://' + ka.get_file_hash(x2) + '.patched'
def _read_chunk_1d(self, i, N): start_byte = self._header.header_size + self._header.num_bytes_per_entry * i end_byte = start_byte + self._header.num_bytes_per_entry * N try: bytes0 = kp.load_bytes(self._path, start=int(start_byte), end=int(end_byte)) except: info0 = ka.get_file_info(self._path) if info0 is None: print(f'Problem reading bytes {start_byte}-{end_byte} from file {self._path} (no info)') else: print(f'Problem reading bytes {start_byte}-{end_byte} from file {self._path} of size {info0["size"]}') raise return np.frombuffer(bytes0, dtype=self._header.dt, count=N)
def _try_mda_create_object(arg: Union[str, dict], samplerate=None) -> Union[None, dict]: if isinstance(arg, str): path = arg if not ka.get_file_info(path): return None return dict(sorting_format='mda', data=dict(firings=path, samplerate=samplerate)) if isinstance(arg, dict): if 'firings' in arg: return dict(recording_format='mda', data=dict(firings=arg['firings'], samplerate=arg.get('samplerate', None))) return None
def _download_files_in_item(x): if type(x) == str: if x.startswith('sha1://') or x.startswith('sha1dir://'): if not ka.get_file_info(x, fr=dict(url=None)): a = kp.load_file(x) assert a is not None, f'Unable to download file: {x}' return elif type(x) == dict: for _, val in x.items(): _download_files_in_item(val) return elif type(x) == list: for y in x: _download_files_in_item(y) return elif type(x) == tuple: for y in x: _download_files_in_item(y) return else: return
def _all_files_are_local_in_item(x): if type(x) == str: if x.startswith('sha1://') or x.startswith('sha1dir://'): if not ka.get_file_info(x, fr=dict(url=None)): return False return True elif type(x) == dict: for _, val in x.items(): if not _all_files_are_local_in_item(val): return False return True elif type(x) == list: for y in x: if not _all_files_are_local_in_item(y): return False return True elif type(x) == tuple: for y in x: if not _all_files_are_local_in_item(y): return False return True else: return True
def execute(_force_run=False, _container=None, **kwargs): import kachery as ka hash_object = dict(api_version='0.1.0', name=name, version=version, input_files=dict(), output_files=dict(), parameters=dict()) resolved_kwargs = dict() hither_input_files = getattr(f, '_hither_input_files', []) hither_output_files = getattr(f, '_hither_output_files', []) hither_parameters = getattr(f, '_hither_parameters', []) # Let's make sure the input and output files are all coming in as File objects for input_file in hither_input_files: iname = input_file['name'] if iname in kwargs: if type(kwargs[iname]) == str: kwargs[iname] = File(kwargs[iname]) for output_file in hither_output_files: oname = output_file['name'] if oname in kwargs: if type(kwargs[oname]) == str: kwargs[oname] = File(kwargs[oname]) input_file_keys = [] for input_file in hither_input_files: iname = input_file['name'] if iname not in kwargs or kwargs[iname] is None: if input_file['required']: raise Exception( 'Missing required input file: {}'.format(iname)) else: x = kwargs[iname] # a hither File object if x._path is None: raise Exception( 'Input file has no path: {}'.format(iname)) # we really want the path x2 = x._path if _is_hash_url(x2): # a hash url y = ka.load_file(x2) if y is None: raise Exception( 'Unable to load input file {}: {}'.format( iname, x)) x2 = y info0 = ka.get_file_info(x2) if info0 is None: raise Exception( 'Unable to get info for input file {}: {}'.format( iname, x2)) tmp0 = dict() for field0 in ['sha1', 'md5']: if field0 in info0: tmp0[field0] = info0[field0] hash_object['input_files'][iname] = tmp0 input_file_keys.append(iname) resolved_kwargs[iname] = x2 output_file_keys = [] for output_file in hither_output_files: oname = output_file['name'] if oname not in kwargs or kwargs[oname] is None: if output_file['required']: raise Exception( 'Missing required output file: {}'.format(oname)) else: x = kwargs[oname] x2 = x._path if _is_hash_url(x2): raise Exception( 'Output file {} cannot be a hash URI: {}'.format( oname, x2)) resolved_kwargs[oname] = x2 if oname in resolved_kwargs: hash_object['output_files'][oname] = True output_file_keys.append(oname) for parameter in hither_parameters: pname = parameter['name'] if pname not in kwargs or kwargs[pname] is None: if parameter['required']: raise Exception( 'Missing required parameter: {}'.format(pname)) if 'default' in parameter: resolved_kwargs[pname] = parameter['default'] else: resolved_kwargs[pname] = kwargs[pname] hash_object['parameters'][pname] = resolved_kwargs[pname] for k, v in kwargs.items(): if k not in resolved_kwargs: hash_object['parameters'][k] = v resolved_kwargs[k] = v if not _force_run: result_serialized: Union[dict, None] = _load_result( hash_object=hash_object) if result_serialized is not None: result0 = _deserialize_result(result_serialized) if result0 is not None: for output_file in hither_output_files: oname = output_file['name'] if oname in resolved_kwargs: shutil.copyfile( getattr(result0.outputs, oname)._path, resolved_kwargs[oname]) _handle_temporary_outputs([ getattr(result0.outputs, oname) for oname in output_file_keys ]) if result0.runtime_info['stdout']: sys.stdout.write(result0.runtime_info['stdout']) if result0.runtime_info['stderr']: sys.stderr.write(result0.runtime_info['stderr']) print( '===== Hither: using cached result for {}'.format( name)) return result0 with ConsoleCapture() as cc: if _container is None: returnval = f(**resolved_kwargs) else: if hasattr(f, '_hither_containers'): if _container in getattr(f, '_hither_containers'): _container = getattr( f, '_hither_containers')[_container] returnval = run_function_in_container( name=name, function=f, input_file_keys=input_file_keys, output_file_keys=output_file_keys, container=_container, keyword_args=resolved_kwargs) result = Result() result.outputs = Outputs() for oname in hash_object['output_files'].keys(): setattr(result.outputs, oname, kwargs[k]) result._output_names.append(oname) result.runtime_info = cc.runtime_info() result.hash_object = hash_object result.retval = returnval _handle_temporary_outputs( [getattr(result.outputs, oname) for oname in output_file_keys]) _store_result(serialized_result=_serialize_result(result)) return result
def _prepare_job_to_run(job): import kachery as ka name = job['name'] # version = job['version'] f = job['f'] kwargs = job['kwargs'] _container = job['container'] _cache = job['cache'] _cache_failing = job['cache_failing'] # _force_run = job['force_run'] _gpu = job['gpu'] _exception_on_fail = job['exception_on_fail'] if _exception_on_fail is None: _exception_on_fail = True _job_handler = job['job_handler'] hash_object = job['hash_object'] result = job['result'] resolved_input_files = dict() hither_input_files = getattr(f, '_hither_input_files', []) hither_output_files = getattr(f, '_hither_output_files', []) # hither_parameters = getattr(f, '_hither_parameters', []) _show_console = job['show_console'] _show_cached_console = job['show_cached_console'] _job_timeout = job['timeout'] input_file_keys = [] input_file_extensions = dict() for input_file in hither_input_files: iname = input_file['name'] if iname not in kwargs or kwargs[iname] is None: if input_file['required']: raise Exception( 'Unexpected: missing required input file: "{}"'.format( iname)) else: x = kwargs[iname] # a hither File object if x._path is None: raise Exception( 'Unexpected: input file has no path: {}'.format(iname)) # we really want the path x2 = x._path if _is_hash_url(x2) and input_file['kachery_resolve']: # a hash url y = ka.load_file(x2) if y is None: raise Exception('Unable to load input file {}: {}'.format( iname, x)) x2 = y info0 = ka.get_file_info(x2) if info0 is None: raise Exception( 'Unable to get info for input file {}: {}'.format( iname, x2)) tmp0 = dict() for field0 in ['sha1', 'md5']: if field0 in info0: tmp0[field0] = info0[field0] hash_object['input_files'][iname] = tmp0 input_file_keys.append(iname) input_file_extensions[iname] = _file_extension(x._path) resolved_input_files[iname] = x2 resolved_output_files = dict() output_file_keys = [] output_file_extensions = dict() for output_file in hither_output_files: oname = output_file['name'] if oname not in kwargs or kwargs[oname] is None: if output_file['required']: raise Exception( 'Unexpected: missing required output file: {}'.format( oname)) else: x = kwargs[oname] x2 = x._path if _is_hash_url(x2): raise Exception( 'Output file {} cannot be a hash URI: {}'.format( oname, x2)) resolved_output_files[oname] = x2 output_file_keys.append(oname) output_file_extensions[oname] = _file_extension(x._path) resolved_parameters = hash_object['parameters'] resolved_kwargs = dict() for k, v in resolved_input_files.items(): resolved_kwargs[k] = v for k, v in resolved_output_files.items(): resolved_kwargs[k] = v for k, v in resolved_parameters.items(): resolved_kwargs[k] = v job['resolved_kwargs'] = resolved_kwargs job['input_file_keys'] = input_file_keys job['input_file_extensions'] = input_file_extensions job['output_file_keys'] = output_file_keys job['output_file_extensions'] = output_file_extensions