def __init__(self, path_format: str): """ A class for pattern-filtered exploration of file paths. :param path_format: The f-string format that the fullpath keys of the obj source should have. Often, just the root directory whose FILES contain the (full_filepath, content) data Also common is to use path_format='{rootdir}/{relative_path}.EXT' to impose a specific extension EXT """ self._path_format = path_format # not intended for use, but keeping in case, for now if '{' not in path_format: rootdir = ensure_slash_suffix(path_format) # if the path_format is equal to the _prefix (i.e. there's no {} formatting) # ... append a formatting element so that the matcher can match all subfiles. path_pattern = path_format + '{}' else: rootdir = ensure_slash_suffix( os.path.dirname(re.match('[^\{]*', path_format).group(0))) path_pattern = path_format self._prefix = rootdir self._path_match_re = match_re_for_fstring(path_pattern) def _key_filt(k): return bool(self._path_match_re.match(k)) self._key_filt = _key_filt
def path_match_regex_from_path_format(path_format): if '{' not in path_format: # if the path_format is equal to the _prefix (i.e. there's no {} formatting) # ... append a formatting element so that the matcher can match all subfiles. path_format = path_format + '{}' return match_re_for_fstring(path_format)
def __init__(self, session_dir, time_units_per_sec=DFLT_TIME_UNITS_PER_SEC, csv_timestamp_time_units_per_sec=int(1e3), rel_path_format='{session:13d}/t/tags{csv_timestamp:13d}.csv', sr=None, **kwargs): """ A store for sessions folders. :param session_dir: The sessions directory, which contains sessions subfolders. :param sr: :param time_units_per_sec: The timestamp unit to use for bt and tt, in num of units per second. For example, if milliseconds, time_units_per_sec=1000, if microseconds, time_units_per_sec=1000000. :param csv_timestamp_time_units_per_sec: "time sample rate" The timestamp unit the csv filename uses. :param rel_path_format: The pathformat under the sessions_dir. Is used to filter the files as well as match information encoded in filename (such as csv_timestamp) :param kwargs: Passed on to the super class __init__ """ path_format = os.path.join(session_dir, rel_path_format) super().__init__(path_format, **kwargs) self._path_match_re = match_re_for_fstring(path_format) self.sr = sr self.time_units_per_sec = time_units_per_sec self.csv_timestamp_time_units_per_sec = csv_timestamp_time_units_per_sec
def __init__(self, path_format: str, contents_of_file: Callable[[str], Any] = dflt_contents_of_file): """ :param path_format: The f-string format that the fullpath keys of the obj source should have. Often, just the root directory whose FILES contain the (full_filepath, content) data Also common is to use path_format='{rootdir}/{relative_path}.EXT' to impose a specific extension EXT :param contents_of_file: The function that returns the python object stored at a given key (path) """ if '{' not in path_format: self._rootdir = path_format else: rootdir = re.match('[^\{]*', path_format).group(0) self._rootdir = os.path.dirname(rootdir) if path_format == self._rootdir: # if the path_format is equal to the _rootdir (i.e. there's no {} formatting) path_format += '{}' # ... add a formatting element so that the matcher can match all subfiles. self._path_match_re = match_re_for_fstring(path_format) self._path_format = path_format self._contents_of_file = contents_of_file
def __init__(self, channel_data_dir, sr=None, **kwargs): # figuring out the sample rate channel_store = LocalPathStore(channel_data_dir) if 'config.json' in channel_store: config = json.loads(channel_store['config.json']) self.sr = int(config.get('sr', sr)) else: self.sr = sr assert ( self.sr is not None ), "I couldn't figure out the sample rate. Should be in a config.json file or given explicitly as argument" # Here we make our path inclusion condition more specific, in order to avoid including any files that # are not "block" files that are compliant with the folder structure. # Namely, we'll only take filepath show structure is CHANNEL_ROOT/s/SESSION/b/BLOCK # and further, constrain valid SESSION and BLOCK names to be exactly 16 digits. # By tagging the session and block segments in the path_format, we're also enabling the construction of # a regular expression that can be used to extract information from our keys. # See the get_session_and_block for how it can be used. path_format = os.path.join(channel_data_dir, 's/{session:16d}/b/{block:16d}') self._path_match_re = match_re_for_fstring(path_format) super().__init__(path_format, **kwargs)