def from_pandas(df, cache_dir): if df.size == 0: return SessionDataInfo() fcn_local_path = lambda rec: Path(cache_dir).joinpath( # noqa rec['lab'], 'public', 'projects', rec['project'], 'ALF', rec['subject'], rec['start_time'].isoformat()[:10], #fcn_local_path = lambda rec: Path(cache_dir).joinpath( # noqa # rec['lab'], 'Subjects', rec['subject'], rec['start_time'].isoformat()[:10], str(rec['number']).zfill(3), rec['collection'], rec['name']) nrecs = df.shape[0] return SessionDataInfo( dataset_type=df.dataset_type.to_list(), dataset_id=list(parquet.np2str(df[['id_0', 'id_1']])), local_path=df.apply(fcn_local_path, axis=1).to_list(), eid=list(parquet.np2str(df[['eid_0', 'eid_1']])), url=[None for _ in range(nrecs)], data=[None for _ in range(nrecs)], hash=df.hash.to_list(), file_size=df.file_size.to_list())
def test_uuids_conversions(self): str_uuid = 'a3df91c8-52a6-4afa-957b-3479a7d0897c' one_np_uuid = np.array([-411333541468446813, 8973933150224022421]) two_np_uuid = np.tile(one_np_uuid, [2, 1]) # array gives a list self.assertTrue(all(map(lambda x: x == str_uuid, np2str(two_np_uuid)))) # single uuid gives a string self.assertTrue(np2str(one_np_uuid) == str_uuid)
def test_uuids_conversions(self): str_uuid = 'a3df91c8-52a6-4afa-957b-3479a7d0897c' one_np_uuid = np.array([-411333541468446813, 8973933150224022421]) two_np_uuid = np.tile(one_np_uuid, [2, 1]) # array gives a list self.assertTrue(all(map(lambda x: x == str_uuid, np2str(two_np_uuid)))) # single uuid gives a string self.assertTrue(np2str(one_np_uuid) == str_uuid) # list uuids with some None entries uuid_list = [ 'bc74f49f33ec0f7545ebc03f0490bdf6', 'c5779e6d02ae6d1d6772df40a1a94243', None, '643371c81724378d34e04a60ef8769f4' ] assert np.all(str2np(uuid_list)[2, :] == 0)
def eid_from_path(self, path_obj): """ From a local path, gets the experiment id :param path_obj: local path or list of local paths :return: eid or list of eids """ # If path_obj is a list recurse through it and return a list if isinstance(path_obj, list): path_obj = [Path(x) for x in path_obj] eid_list = [] for p in path_obj: eid_list.append(self.eid_from_path(p)) return eid_list # else ensure the path ends with mouse,date, number path_obj = Path(path_obj) session_path = alfio.get_session_path(path_obj) # if path does not have a date and a number, or cache is empty return None if session_path is None or self._cache.size == 0: return None # fetch eid from cache ind = ((self._cache['subject'] == session_path.parts[-3]) & (self._cache['start_time'].apply( lambda x: x.isoformat()[:10] == session_path.parts[-2])) & (self._cache['number']) == int(session_path.parts[-1])) ind = np.where(ind.to_numpy())[0] if ind.size > 0: return parquet.np2str(self._cache[['eid_0', 'eid_1']].iloc[ind[0]])
def eid_from_path(self, path_obj, use_cache=True): """ From a local path, gets the experiment id :param path_obj: local path or list of local paths :param use_cache: if set to False, will force database connection :return: eid or list of eids """ # If path_obj is a list recurse through it and return a list if isinstance(path_obj, list): path_obj = [Path(x) for x in path_obj] eid_list = [] for p in path_obj: eid_list.append(self.eid_from_path(p)) return eid_list # else ensure the path ends with mouse,date, number path_obj = Path(path_obj) session_path = alfio.get_session_path(path_obj) # if path does not have a date and a number return None if session_path is None: return None # try the cached info to possibly avoid hitting database if self._cache.size > 0 and use_cache: ind = ((self._cache['subject'] == session_path.parts[-3]) & (self._cache['start_time'].apply( lambda x: x.isoformat()[:10] == session_path.parts[-2])) & (self._cache['number']) == int(session_path.parts[-1])) ind = np.where(ind.to_numpy())[0] if ind.size > 0: return parquet.np2str(self._cache[['eid_0', 'eid_1']].iloc[ind[0]]) # if not search for subj, date, number XXX: hits the DB uuid = self.search(subjects=session_path.parts[-3], date_range=session_path.parts[-2], number=session_path.parts[-1]) # Return the uuid if any return uuid[0] if uuid else None