Ejemplo n.º 1
0
    def __init__(self, arg, download=False):
        super().__init__()
        self._hash = None
        if isinstance(arg, str):
            arg = dict(path=arg)
        if isinstance(arg, se.RecordingExtractor):
            self._recording = arg
        else:
            self._recording = None

            # filters
            if ('recording' in arg) and ('filters' in arg):
                recording1 = AutoRecordingExtractor(arg['recording'])
                self._recording = self._apply_filters(recording1,
                                                      arg['filters'])
                return

            if 'kachery_config' in arg:
                ka.set_config(**arg['kachery_config'])
            path = arg.get('path', '')
            if 'nwb_path' in arg:
                self._recording = NwbElectricalSeriesRecordingExtractor(
                    path=path, nwb_path=arg['nwb_path'])
            elif path.endswith('.mda'):
                if 'samplerate' not in arg:
                    raise Exception('Missing argument: samplerate')
                samplerate = arg['samplerate']
                self._recording = MdaRecordingExtractor(timeseries_path=path,
                                                        samplerate=samplerate,
                                                        download=download)
                hash0 = _sha1_of_object(
                    dict(timeseries_sha1=ka.get_file_info(
                        path, algorithm='sha1')['sha1'],
                         samplerate=samplerate))
                setattr(self, 'hash', hash0)
            elif path.endswith('.nwb.json'):
                self._recording = NwbJsonRecordingExtractor(file_path=path)
                hash0 = ka.get_file_info(path)['sha1']
                setattr(self, 'hash', hash0)
            elif path.endswith('.json') and (not path.endswith('.nwb.json')):
                obj = ka.load_object(path)
                if ('raw' in obj) and ('params' in obj) and ('geom' in obj):
                    self._recording = MdaRecordingExtractor(
                        timeseries_path=obj['raw'],
                        samplerate=obj['params']['samplerate'],
                        geom=np.array(obj['geom']))
                else:
                    raise Exception('Problem initializing recording extractor')
            elif ka.get_file_info(path + '/raw.mda'):
                self._recording = MdaRecordingExtractor(
                    recording_directory=path, download=download)
            else:
                raise Exception('Unable to initialize recording extractor.')
        self.copy_channel_properties(recording=self._recording)
Ejemplo n.º 2
0
def _read_header(path, verbose=True):
    info0 = ka.get_file_info(path)
    if info0 is None:
        raise Exception(f'Unable to find file: {path}')
    bytes0 = ka.load_bytes(path, start=0, end=min(200, info0['size']))
    if bytes0 is None:
        ka.set_config(fr='default_readonly')
        print(ka.get_file_info(path))
        raise Exception('Unable to load header bytes from {}'.format(path))
    f = io.BytesIO(bytes0)
    try:
        dt_code = _read_int32(f)
        _ = _read_int32(f)  # num bytes per entry
        num_dims = _read_int32(f)
        uses64bitdims = False
        if (num_dims < 0):
            uses64bitdims = True
            num_dims = -num_dims
        if (num_dims < 1) or (num_dims >
                              6):  # allow single dimension as of 12/6/17
            if verbose:
                print("Invalid number of dimensions: {}".format(num_dims))
            f.close()
            return None
        dims = []
        dimprod = 1
        if uses64bitdims:
            for _ in range(0, num_dims):
                tmp0 = _read_int64(f)
                dimprod = dimprod * tmp0
                dims.append(tmp0)
        else:
            for _ in range(0, num_dims):
                tmp0 = _read_int32(f)
                dimprod = dimprod * tmp0
                dims.append(tmp0)
        dt = _dt_from_dt_code(dt_code)
        if dt is None:
            if verbose:
                print("Invalid data type code: {}".format(dt_code))
            f.close()
            return None
        H = MdaHeader(dt, dims)
        if (uses64bitdims):
            H.uses64bitdims = True
            H.header_size = 3 * 4 + H.num_dims * 8
        f.close()
        return H
    except Exception as e:  # catch *all* exceptions
        if verbose:
            print(e)
        f.close()
        return None
Ejemplo n.º 3
0
 def __init__(self, arg, samplerate=None):
     super().__init__()
     self._hash = None
     if isinstance(arg, se.SortingExtractor):
         self._sorting = arg
         self.copy_unit_properties(sorting=self._sorting)
     else:
         self._sorting = None
         if type(arg) == str:
             arg = dict(path=arg, samplerate=samplerate)
         if type(arg) == dict:
             if 'kachery_config' in arg:
                 ka.set_config(**arg['kachery_config'])
             if 'path' in arg:
                 path = arg['path']
                 if ka.get_file_info(path):
                     file_path = ka.load_file(path)
                     if not file_path:
                         raise Exception(
                             'Unable to realize file: {}'.format(path))
                     self._init_from_file(file_path,
                                          original_path=path,
                                          kwargs=arg)
                 else:
                     raise Exception('Not a file: {}'.format(path))
             else:
                 raise Exception('Unable to initialize sorting extractor')
         else:
             raise Exception(
                 'Unable to initialize sorting extractor (unexpected type)')
def patch_recording_geom(recording, geom_fname):
    print(f'PATCHING geom for recording: {recording["name"]}')
    geom_info = ka.get_file_info(geom_fname)
    x = recording['directory']
    y = ka.store_dir(x).replace('sha1dir://', 'sha1://')
    obj = ka.load_object(y)
    obj['files']['geom.csv'] = dict(size=geom_info['size'],
                                    sha1=geom_info['sha1'])
    x2 = ka.store_object(obj)
    recording['directory'] = 'sha1dir://' + ka.get_file_hash(x2) + '.patched'
Ejemplo n.º 5
0
 def _read_chunk_1d(self, i, N):
     start_byte = self._header.header_size + self._header.num_bytes_per_entry * i
     end_byte = start_byte + self._header.num_bytes_per_entry * N
     try:
         bytes0 = kp.load_bytes(self._path, start=int(start_byte), end=int(end_byte))
     except:
         info0 = ka.get_file_info(self._path)
         if info0 is None:
             print(f'Problem reading bytes {start_byte}-{end_byte} from file {self._path} (no info)')
         else:
             print(f'Problem reading bytes {start_byte}-{end_byte} from file {self._path} of size {info0["size"]}')
         raise
     return np.frombuffer(bytes0, dtype=self._header.dt, count=N)
def _try_mda_create_object(arg: Union[str, dict],
                           samplerate=None) -> Union[None, dict]:
    if isinstance(arg, str):
        path = arg
        if not ka.get_file_info(path):
            return None
        return dict(sorting_format='mda',
                    data=dict(firings=path, samplerate=samplerate))

    if isinstance(arg, dict):
        if 'firings' in arg:
            return dict(recording_format='mda',
                        data=dict(firings=arg['firings'],
                                  samplerate=arg.get('samplerate', None)))

    return None
Ejemplo n.º 7
0
def _download_files_in_item(x):
    if type(x) == str:
        if x.startswith('sha1://') or x.startswith('sha1dir://'):
            if not ka.get_file_info(x, fr=dict(url=None)):
                a = kp.load_file(x)
                assert a is not None, f'Unable to download file: {x}'
        return
    elif type(x) == dict:
        for _, val in x.items():
            _download_files_in_item(val)
        return
    elif type(x) == list:
        for y in x:
            _download_files_in_item(y)
        return
    elif type(x) == tuple:
        for y in x:
            _download_files_in_item(y)
        return
    else:
        return
Ejemplo n.º 8
0
def _all_files_are_local_in_item(x):
    if type(x) == str:
        if x.startswith('sha1://') or x.startswith('sha1dir://'):
            if not ka.get_file_info(x, fr=dict(url=None)):
                return False
        return True
    elif type(x) == dict:
        for _, val in x.items():
            if not _all_files_are_local_in_item(val):
                return False
        return True
    elif type(x) == list:
        for y in x:
            if not _all_files_are_local_in_item(y):
                return False
        return True
    elif type(x) == tuple:
        for y in x:
            if not _all_files_are_local_in_item(y):
                return False
        return True
    else:
        return True
Ejemplo n.º 9
0
        def execute(_force_run=False, _container=None, **kwargs):
            import kachery as ka
            hash_object = dict(api_version='0.1.0',
                               name=name,
                               version=version,
                               input_files=dict(),
                               output_files=dict(),
                               parameters=dict())
            resolved_kwargs = dict()
            hither_input_files = getattr(f, '_hither_input_files', [])
            hither_output_files = getattr(f, '_hither_output_files', [])
            hither_parameters = getattr(f, '_hither_parameters', [])

            # Let's make sure the input and output files are all coming in as File objects
            for input_file in hither_input_files:
                iname = input_file['name']
                if iname in kwargs:
                    if type(kwargs[iname]) == str:
                        kwargs[iname] = File(kwargs[iname])
            for output_file in hither_output_files:
                oname = output_file['name']
                if oname in kwargs:
                    if type(kwargs[oname]) == str:
                        kwargs[oname] = File(kwargs[oname])

            input_file_keys = []
            for input_file in hither_input_files:
                iname = input_file['name']
                if iname not in kwargs or kwargs[iname] is None:
                    if input_file['required']:
                        raise Exception(
                            'Missing required input file: {}'.format(iname))
                else:
                    x = kwargs[iname]
                    # a hither File object
                    if x._path is None:
                        raise Exception(
                            'Input file has no path: {}'.format(iname))
                    # we really want the path
                    x2 = x._path
                    if _is_hash_url(x2):
                        # a hash url
                        y = ka.load_file(x2)
                        if y is None:
                            raise Exception(
                                'Unable to load input file {}: {}'.format(
                                    iname, x))
                        x2 = y
                    info0 = ka.get_file_info(x2)
                    if info0 is None:
                        raise Exception(
                            'Unable to get info for input file {}: {}'.format(
                                iname, x2))
                    tmp0 = dict()
                    for field0 in ['sha1', 'md5']:
                        if field0 in info0:
                            tmp0[field0] = info0[field0]
                    hash_object['input_files'][iname] = tmp0
                    input_file_keys.append(iname)
                    resolved_kwargs[iname] = x2

            output_file_keys = []
            for output_file in hither_output_files:
                oname = output_file['name']
                if oname not in kwargs or kwargs[oname] is None:
                    if output_file['required']:
                        raise Exception(
                            'Missing required output file: {}'.format(oname))
                else:
                    x = kwargs[oname]
                    x2 = x._path
                    if _is_hash_url(x2):
                        raise Exception(
                            'Output file {} cannot be a hash URI: {}'.format(
                                oname, x2))
                    resolved_kwargs[oname] = x2
                    if oname in resolved_kwargs:
                        hash_object['output_files'][oname] = True
                        output_file_keys.append(oname)

            for parameter in hither_parameters:
                pname = parameter['name']
                if pname not in kwargs or kwargs[pname] is None:
                    if parameter['required']:
                        raise Exception(
                            'Missing required parameter: {}'.format(pname))
                    if 'default' in parameter:
                        resolved_kwargs[pname] = parameter['default']
                else:
                    resolved_kwargs[pname] = kwargs[pname]
                hash_object['parameters'][pname] = resolved_kwargs[pname]

            for k, v in kwargs.items():
                if k not in resolved_kwargs:
                    hash_object['parameters'][k] = v
                    resolved_kwargs[k] = v

            if not _force_run:
                result_serialized: Union[dict, None] = _load_result(
                    hash_object=hash_object)
                if result_serialized is not None:
                    result0 = _deserialize_result(result_serialized)
                    if result0 is not None:
                        for output_file in hither_output_files:
                            oname = output_file['name']
                            if oname in resolved_kwargs:
                                shutil.copyfile(
                                    getattr(result0.outputs, oname)._path,
                                    resolved_kwargs[oname])
                        _handle_temporary_outputs([
                            getattr(result0.outputs, oname)
                            for oname in output_file_keys
                        ])
                        if result0.runtime_info['stdout']:
                            sys.stdout.write(result0.runtime_info['stdout'])
                        if result0.runtime_info['stderr']:
                            sys.stderr.write(result0.runtime_info['stderr'])
                        print(
                            '===== Hither: using cached result for {}'.format(
                                name))
                        return result0

            with ConsoleCapture() as cc:
                if _container is None:
                    returnval = f(**resolved_kwargs)
                else:
                    if hasattr(f, '_hither_containers'):
                        if _container in getattr(f, '_hither_containers'):
                            _container = getattr(
                                f, '_hither_containers')[_container]
                    returnval = run_function_in_container(
                        name=name,
                        function=f,
                        input_file_keys=input_file_keys,
                        output_file_keys=output_file_keys,
                        container=_container,
                        keyword_args=resolved_kwargs)

            result = Result()
            result.outputs = Outputs()
            for oname in hash_object['output_files'].keys():
                setattr(result.outputs, oname, kwargs[k])
                result._output_names.append(oname)
            result.runtime_info = cc.runtime_info()
            result.hash_object = hash_object
            result.retval = returnval
            _handle_temporary_outputs(
                [getattr(result.outputs, oname) for oname in output_file_keys])
            _store_result(serialized_result=_serialize_result(result))
            return result
Ejemplo n.º 10
0
def _prepare_job_to_run(job):
    import kachery as ka

    name = job['name']
    # version = job['version']
    f = job['f']
    kwargs = job['kwargs']

    _container = job['container']
    _cache = job['cache']
    _cache_failing = job['cache_failing']
    # _force_run = job['force_run']
    _gpu = job['gpu']
    _exception_on_fail = job['exception_on_fail']
    if _exception_on_fail is None: _exception_on_fail = True
    _job_handler = job['job_handler']
    hash_object = job['hash_object']
    result = job['result']
    resolved_input_files = dict()
    hither_input_files = getattr(f, '_hither_input_files', [])
    hither_output_files = getattr(f, '_hither_output_files', [])
    # hither_parameters = getattr(f, '_hither_parameters', [])
    _show_console = job['show_console']
    _show_cached_console = job['show_cached_console']
    _job_timeout = job['timeout']

    input_file_keys = []
    input_file_extensions = dict()
    for input_file in hither_input_files:
        iname = input_file['name']
        if iname not in kwargs or kwargs[iname] is None:
            if input_file['required']:
                raise Exception(
                    'Unexpected: missing required input file: "{}"'.format(
                        iname))
        else:
            x = kwargs[iname]
            # a hither File object
            if x._path is None:
                raise Exception(
                    'Unexpected: input file has no path: {}'.format(iname))
            # we really want the path
            x2 = x._path
            if _is_hash_url(x2) and input_file['kachery_resolve']:
                # a hash url
                y = ka.load_file(x2)
                if y is None:
                    raise Exception('Unable to load input file {}: {}'.format(
                        iname, x))
                x2 = y
            info0 = ka.get_file_info(x2)
            if info0 is None:
                raise Exception(
                    'Unable to get info for input file {}: {}'.format(
                        iname, x2))
            tmp0 = dict()
            for field0 in ['sha1', 'md5']:
                if field0 in info0:
                    tmp0[field0] = info0[field0]
            hash_object['input_files'][iname] = tmp0
            input_file_keys.append(iname)
            input_file_extensions[iname] = _file_extension(x._path)
            resolved_input_files[iname] = x2

    resolved_output_files = dict()
    output_file_keys = []
    output_file_extensions = dict()
    for output_file in hither_output_files:
        oname = output_file['name']
        if oname not in kwargs or kwargs[oname] is None:
            if output_file['required']:
                raise Exception(
                    'Unexpected: missing required output file: {}'.format(
                        oname))
        else:
            x = kwargs[oname]
            x2 = x._path
            if _is_hash_url(x2):
                raise Exception(
                    'Output file {} cannot be a hash URI: {}'.format(
                        oname, x2))
            resolved_output_files[oname] = x2
            output_file_keys.append(oname)
            output_file_extensions[oname] = _file_extension(x._path)

    resolved_parameters = hash_object['parameters']

    resolved_kwargs = dict()
    for k, v in resolved_input_files.items():
        resolved_kwargs[k] = v
    for k, v in resolved_output_files.items():
        resolved_kwargs[k] = v
    for k, v in resolved_parameters.items():
        resolved_kwargs[k] = v

    job['resolved_kwargs'] = resolved_kwargs
    job['input_file_keys'] = input_file_keys
    job['input_file_extensions'] = input_file_extensions
    job['output_file_keys'] = output_file_keys
    job['output_file_extensions'] = output_file_extensions