Example #1
0
 def _load(self, eid, dataset_types=None, dclass_output=False, dry_run=False, cache_dir=None,
           download_only=False, clobber=False, offline=False, keep_uuid=False):
     """
     From a Session ID and dataset types, queries Alyx database, downloads the data
     from Globus, and loads into numpy array. Single session only
     """
     # if the input as an UUID, add the beginning of URL to it
     cache_dir = self._get_cache_dir(cache_dir)
     if is_uuid_string(eid):
         eid = '/sessions/' + eid
     eid_str = eid[-36:]
     # get session json information as a dictionary from the alyx API
     try:
         ses = self.alyx.get('/sessions/' + eid_str)
     except requests.HTTPError:
         raise requests.HTTPError('Session ' + eid_str + ' does not exist')
     # ses = ses[0]
     # if no dataset_type is provided:
     # a) force the output to be a dictionary that provides context to the data
     # b) download all types that have a data url specified whithin the alf folder
     dataset_types = [dataset_types] if isinstance(dataset_types, str) else dataset_types
     if not dataset_types or dataset_types == ['__all__']:
         dclass_output = True
     dc = SessionDataInfo.from_session_details(ses, dataset_types=dataset_types, eid=eid_str)
     # loop over each dataset and download if necessary
     for ind in range(len(dc)):
         if dc.url[ind] and not dry_run:
             relpath = PurePath(dc.url[ind].replace(self._par.HTTP_DATA_SERVER, '.')).parents[0]
             cache_dir_file = PurePath(cache_dir, relpath)
             Path(cache_dir_file).mkdir(parents=True, exist_ok=True)
             dc.local_path[ind] = self._download_file(
                 dc.url[ind], str(cache_dir_file), clobber=clobber, offline=offline,
                 keep_uuid=keep_uuid, file_size=dc.file_size[ind], hash=dc.hash[ind])
     # load the files content in variables if requested
     if not download_only:
         for ind, fil in enumerate(dc.local_path):
             dc.data[ind] = load_file_content(fil)
     # parse output arguments
     if dclass_output:
         return dc
     # if required, parse the output as a list that matches dataset_types requested
     list_out = []
     for dt in dataset_types:
         if dt not in dc.dataset_type:
             _logger.warning('dataset ' + dt + ' not found for session: ' + eid_str)
             list_out.append(None)
             continue
         for i, x, in enumerate(dc.dataset_type):
             if dt == x:
                 if dc.data[i] is not None:
                     list_out.append(dc.data[i])
                 else:
                     list_out.append(dc.local_path[i])
     return list_out
Example #2
0
    def load_dataset(self,
                     eid: Union[str, Path, UUID],
                     dataset: str,
                     collection: Optional[str] = None,
                     download_only: bool = False) -> Any:
        """
        Load a single dataset from a Session ID and a dataset type.

        :param eid: Experiment session identifier; may be a UUID, URL, experiment reference string
        details dict or Path
        :param dataset: The ALF dataset to load.  Supports asterisks as wildcards.
        :param collection:  The collection to which the object belongs, e.g. 'alf/probe01'.
        For IBL this is the relative path of the file from the session root.
        Supports asterisks as wildcards.
        :param download_only: When true the data are downloaded and the file path is returned
        :return: dataset or a Path object if download_only is true

        Examples:
            intervals = one.load_dataset(eid, '_ibl_trials.intervals.npy')
            intervals = one.load_dataset(eid, '*trials.intervals*')
            filepath = one.load_dataset(eid '_ibl_trials.intervals.npy', download_only=True)
            spikes = one.load_dataset(eid 'spikes.times.npy', collection='alf/probe01')
        """
        search_str = 'name__regex,' + dataset.replace('.', r'\.').replace(
            '*', '.*')
        if collection:
            search_str += ',collection__regex,' + collection.replace('*', '.*')
        results = self.alyx.rest('datasets',
                                 'list',
                                 session=eid,
                                 django=search_str,
                                 exists=True)

        # Get filenames of returned ALF files
        collection_set = {x['collection'] for x in results}
        if len(collection_set) > 1:
            raise ALFMultipleCollectionsFound(
                'Matching dataset belongs to multiple collections:' +
                ', '.join(collection_set))
        if len(results) > 1:
            raise ALFMultipleObjectsFound(
                'The following matching datasets were found: ' +
                ', '.join(x['name'] for x in results))
        if len(results) == 0:
            raise ALFObjectNotFound(f'Dataset "{dataset}" not found on Alyx')

        filename = self.download_dataset(results[0])
        assert filename is not None, 'failed to download dataset'

        return filename if download_only else alfio.load_file_content(filename)
Example #3
0
 def _load(self,
           eid,
           dataset_types=None,
           dclass_output=False,
           download_only=False,
           offline=False,
           **kwargs):
     """
     From a Session ID and dataset types, queries Alyx database, downloads the data
     from Globus, and loads into numpy array. Single session only
     """
     if alfio.is_uuid_string(eid):
         eid = '/sessions/' + eid
     eid_str = eid[-36:]
     # if no dataset_type is provided:
     # a) force the output to be a dictionary that provides context to the data
     # b) download all types that have a data url specified whithin the alf folder
     dataset_types = [dataset_types] if isinstance(dataset_types,
                                                   str) else dataset_types
     if not dataset_types or dataset_types == ['__all__']:
         dclass_output = True
     if offline:
         dc = self._make_dataclass_offline(eid_str, dataset_types, **kwargs)
     else:
         dc = self._make_dataclass(eid_str, dataset_types, **kwargs)
     # load the files content in variables if requested
     if not download_only:
         for ind, fil in enumerate(dc.local_path):
             dc.data[ind] = alfio.load_file_content(fil)
     # parse output arguments
     if dclass_output:
         return dc
     # if required, parse the output as a list that matches dataset_types requested
     list_out = []
     for dt in dataset_types:
         if dt not in dc.dataset_type:
             _logger.warning('dataset ' + dt + ' not found for session: ' +
                             eid_str)
             list_out.append(None)
             continue
         for i, x, in enumerate(dc.dataset_type):
             if dt == x:
                 if dc.data[i] is not None:
                     list_out.append(dc.data[i])
                 else:
                     list_out.append(dc.local_path[i])
     return list_out
Example #4
0
 def load_data(self, download=False):
     """
     Load wheel, trial and camera timestamp data
     :return: wheel, trials
     """
     if download:
         self.data.wheel = self.one.load_object(self.eid, 'wheel')
         self.data.trials = self.one.load_object(self.eid, 'trials')
         cam = self.one.load(self.eid, ['camera.times'], dclass_output=True)
         self.data.camera_times = {
             vidio.label_from_path(url): ts
             for ts, url in zip(cam.data, cam.url)
         }
     else:
         alf_path = self.session_path / 'alf'
         self.data.wheel = alfio.load_object(alf_path, 'wheel')
         self.data.trials = alfio.load_object(alf_path, 'trials')
         self.data.camera_times = {
             vidio.label_from_path(x): alfio.load_file_content(x)
             for x in alf_path.glob('*Camera.times*')
         }
     assert all(x is not None for x in self.data.values())
Example #5
0
 def _load(self,
           eid,
           dataset_types=None,
           dclass_output=False,
           dry_run=False,
           cache_dir=None,
           download_only=False,
           clobber=False,
           offline=False,
           keep_uuid=False):
     """
     From a Session ID and dataset types, queries Alyx database, downloads the data
     from Globus, and loads into numpy array. Single session only
     """
     # if the input as an UUID, add the beginning of URL to it
     cache_dir = self._get_cache_dir(cache_dir)
     if is_uuid_string(eid):
         eid = '/sessions/' + eid
     eid_str = eid[-36:]
     # get session json information as a dictionary from the alyx API
     try:
         ses = self.alyx.get('/sessions/' + eid_str)
     except requests.HTTPError:
         raise requests.HTTPError('Session ' + eid_str + ' does not exist')
     # ses = ses[0]
     # if no dataset_type is provided:
     # a) force the output to be a dictionary that provides context to the data
     # b) download all types that have a data url specified whithin the alf folder
     dataset_types = [dataset_types] if isinstance(dataset_types,
                                                   str) else dataset_types
     if not dataset_types or dataset_types == ['__all__']:
         dclass_output = True
     # this performs the filtering
     dc = SessionDataInfo.from_session_details(ses,
                                               dataset_types=dataset_types,
                                               eid=eid_str)
     # loop over each dataset and download if necessary
     with concurrent.futures.ThreadPoolExecutor(
             max_workers=NTHREADS) as executor:
         futures = []
         for ind in range(len(dc)):
             if dc.url[ind] is None or dry_run:
                 futures.append(None)
             else:
                 futures.append(
                     executor.submit(self.download_dataset,
                                     dc.url[ind],
                                     cache_dir=cache_dir,
                                     clobber=clobber,
                                     offline=offline,
                                     keep_uuid=keep_uuid,
                                     file_size=dc.file_size[ind],
                                     hash=dc.hash[ind]))
         concurrent.futures.wait(
             list(filter(lambda x: x is not None, futures)))
         for ind, future in enumerate(futures):
             if future is None:
                 continue
             dc.local_path[ind] = future.result()
     # load the files content in variables if requested
     if not download_only:
         for ind, fil in enumerate(dc.local_path):
             dc.data[ind] = load_file_content(fil)
     # parse output arguments
     if dclass_output:
         return dc
     # if required, parse the output as a list that matches dataset_types requested
     list_out = []
     for dt in dataset_types:
         if dt not in dc.dataset_type:
             _logger.warning('dataset ' + dt + ' not found for session: ' +
                             eid_str)
             list_out.append(None)
             continue
         for i, x, in enumerate(dc.dataset_type):
             if dt == x:
                 if dc.data[i] is not None:
                     list_out.append(dc.data[i])
                 else:
                     list_out.append(dc.local_path[i])
     return list_out