def build(self): super().build() files = self.options.files editables = self.options.edit globs = {f: files[f] for f in files if glob.has_magic(f)} filepaths = {os.path.join(self.builddir, f): files[f] for f in files if not glob.has_magic(f)} for edit in editables: fpath = os.path.join(self.builddir, edit) if os.path.exists(fpath): for regex in editables[edit]: for line in fileinput.input([fpath], inplace=True): r = re.compile(regex) print (r.sub(editables[edit][regex], line.rstrip())) for src in globs: paths = glob.glob(os.path.join(self.builddir, src)) if not paths: raise EnvironmentError('no matches for {!r}'.format(src)) for path in paths: filepaths.update( {os.path.join(self.builddir, path): globs[src]}) for src in sorted(filepaths): dst = os.path.join(self.installdir, filepaths[src].lstrip('/')) os.makedirs(os.path.dirname(dst), exist_ok=True) _recursively_link(src, dst, self.installdir)
def get_filenames(path=os.curdir): """ Return an iterator of the filenames in `path`. If this function could not retrieve any filename due to access errors then the iterator will be empty (i.e. yielding no items). Note that shell-like globbing is performed if `path` contains wildcard symbols such as "*" or "?". The function will then return all names that match the given pattern instead of their directory contents. If you need the contents then you should put the platform's path separator at the end of your pattern. In other words (on Windows): r'Python27' => Content of "Python27"-folder r'Py*' => Names starting with "Py" (e.g. "Python27", "Python34", ...) r'Py*\\' => Contents of directories starting with "Py" r'Py*\*.txt' => E.g. all text files in all Python folders To make life easier, you are free to use alternative path separators if they are supported by your platform (e.g. "/" instead of "\" on Windows). Additionally, the "~"-symbol will be expanded to the user's home directory. """ path = os.path.expanduser(os.path.expandvars(path)) if os.altsep is not None: path = path.replace(os.altsep, os.sep) if not glob.has_magic(path) or path.endswith(os.sep): path = os.path.join(path, '*') filenames = (fn.rstrip(os.sep) for fn in glob.iglob(path)) if not glob.has_magic(os.path.dirname(path)): filenames = (os.path.basename(fn) for fn in filenames) return filenames
def find_data_files(source_path, target_path, patterns): """ Finds data files in the given source path and maps them into the target path. The list of patterns in glob format represents the filters. @type source_path: String @param source_path: The source path to find the data files. @type target_path: String @param target_path: The target path to the data files. @type patterns: List @param patterns: The list of patterns for file matching. @rtype: List @return: The list of data file references. """ # in case the source path or the target path contain # a glob pattern if glob.has_magic(source_path) or glob.has_magic(target_path): # raises an exception raise ValueError("magic not allowed in source and target") # creates the data files map, responsible for mapping # the various directories with the existent data files data_files_map = {} # iterates over all the patterns to be able to filter # the file that match the provided patterns for pattern in patterns: # joins the source path and the pattern # to create the "complete" pattern pattern = os.path.join(source_path, pattern) # iterates over all the filenames in the # glob pattern for file_name in glob.glob(pattern): # in case there is no file to be read # must skip the current loop if not os.path.isfile(file_name): continue # retrieves the relative file path between # the source path and the file name relative_file_path = os.path.relpath(file_name, source_path) # creates the target file path using the # target path and the relative path and then # retrieves its directory name as the path target_file_path = os.path.join(target_path, relative_file_path) path = os.path.dirname(target_file_path) # adds the filename to the data files map data_files_map.setdefault(path, []).append(file_name) # retrieves the data files items and then sorts # them according to the default order data_files_items = data_files_map.items() data_files_items = sorted(data_files_items) # returns the data files items return data_files_items
def _get_sub_patterns(self, pattern): """Extract sub-patterns from the leading path of `pattern`. The right-most path component is successively peeled off until there are no patterns left. """ if pattern in self._paths["sub_patterns"]: return self._paths["sub_patterns"][pattern] head, tail = op.split(pattern) if not tail: # Pattern ended with a separator. Take the first directory as the # base. head, tail = op.split(head) sub_patterns = [] seen_magic = glob.has_magic(tail) while head: new_head, tail = op.split(head) if seen_magic and not glob.has_magic(head): break elif not seen_magic and glob.has_magic(tail): seen_magic = True if seen_magic: sub_patterns.append(head + op.sep) head = new_head self._paths["sub_patterns"][pattern] = sub_patterns return sub_patterns
def iglob(self, pathname): """ Return an iterator which yields the paths matching a pathname pattern. The pattern may contain simple shell-style wildcards a la fnmatch. """ if not glob.has_magic(pathname): if self.ssh.lpath_exists(pathname): yield pathname return dirname, basename = posixpath.split(pathname) if not dirname: for name in self.glob1(posixpath.curdir, basename): yield name return if glob.has_magic(dirname): dirs = self.iglob(dirname) else: dirs = [dirname] if glob.has_magic(basename): glob_in_dir = self.glob1 else: glob_in_dir = self.glob0 for dirname in dirs: for name in glob_in_dir(dirname, basename): yield posixpath.join(dirname, name)
def find_data_files(source, target, patterns): """ Locates the specified data-files and returns the matches in a data_files compatible format. source is the root of the source data tree. Use '' or '.' for current directory. target is the root of the target data tree. Use '' or '.' for the distribution directory. patterns is a sequence of glob-patterns for the files you want to copy. Modified slightly from http://www.py2exe.org/index.cgi/data_files """ if glob.has_magic(source) or glob.has_magic(target): raise ValueError("Magic not allowed in src, target") ret = defaultdict(list) for pattern in patterns: pattern = os.path.join(source, pattern) for filename in glob.glob(pattern): if os.path.isfile(filename): targetpath = os.path.join(target, os.path.relpath(filename, source)) ret[os.path.dirname(targetpath)].append(filename) return sorted(ret.items())
def path_glob(command, cmdargs, cwd="."): import glob if not glob.has_magic(command): return cmdargs assert os.path.isdir(cwd) try: current_cwd = os.getcwd() os.chdir(cwd) new_cmdargs = [] for cmdarg in cmdargs: if not glob.has_magic(cmdarg): new_cmdargs.append(cmdarg) continue more_args = glob.glob(cmdarg) if more_args: new_cmdargs.extend(more_args) else: # -- BAD-CASE: Require at least one match. # Otherwise, restore original arg. new_cmdargs.append(cmdarg) cmdargs = new_cmdargs finally: os.chdir(current_cwd) return cmdargs
def find_data_files(source, target, patterns): """ Locates the specified data-files and returns the matches in a data_files compatible format. source is the root of the source data tree. Use '' or '.' for current directory. target is the root of the target data tree. Use '' or '.' for the distribution directory. patterns is a sequence of glob-patterns for the files you want to copy. """ if glob.has_magic(source) or glob.has_magic(target): raise ValueError("Magic not allowed in src, target") ret = {} while len(patterns) > 0: pattern = patterns.pop(0) pattern = os.path.join(source, pattern) for filename in glob.glob(pattern): if os.path.isfile(filename): targetpath = os.path.join(target, os.path.relpath(filename, source)) path = os.path.dirname(targetpath) ret.setdefault(path, []).append(filename) if os.path.isdir(filename): new_pattern = os.path.join(os.path.relpath(filename, source), "*") patterns.append(new_pattern) return sorted(ret.items())
def find_data_files(source,target,patterns): if glob.has_magic(source) or glob.has_magic(target): raise ValueError("Magic not allowed in src, target") ret = {} for pattern in patterns: pattern = os.path.join(source,pattern) for filename in glob.glob(pattern): if os.path.isfile(filename): targetpath = os.path.join(target,os.path.relpath(filename,source)) path = os.path.dirname(targetpath) ret.setdefault(path,[]).append(filename) return sorted(ret.items())
def universal_copy(src, dst): """ Function that copies the files or directories specified by the src argument to the destination given by the dst argument. It should follow the same rules as the standard 'cp' utility. :param src: source to copy -- may be a glob, file path or a directory path :type src: str :param dst: destination to copy to :type src: str """ if glob.has_magic(src): # src is a glob sources = glob.glob(src) else: # not a glob sources = [src] for item in sources: if os.path.isdir(item): if os.path.isdir(dst): item = item.rstrip("/") dirname = item.rsplit("/", 1)[-1] shutil.copytree(item, join_paths(dst, dirname)) else: shutil.copytree(item, dst) else: shutil.copy2(item, dst)
def filename_matches(self, text, line): """return matching filenames unless text contains wildcard characters""" if glob.has_magic(text): return [] # look for IRAF virtual filenames #XXX This might be simplified if '$' and '/' were added to the set #XXX of characters permitted in words. Can't do that now, as #XXX far as I can tell, but Python 1.6 should allow it. #XXX Need to improve this for filenames that include characters #XXX not included in the spanned text. E.g. .csh<TAB> does not #XXX work because the '.' is not part of the name, and filenames #XXX with embedded '-' or '+' do not work. if line[-1] == '$': # preceded by IRAF environment variable m = re.search(r'\w*\$$', line) dir = iraf.Expand(m.group()) elif line[-1] == os.sep: # filename is preceded by path separator # match filenames with letters, numbers, $, ~, ., -, +, and # directory separator m = re.search(r'[\w.~$+-%s]*$' % os.sep, line) dir = iraf.Expand(m.group()) else: dir = '' return self._dir_matches(text, dir)
def execute(self, args): if len(args) < 3: self.parser.error("please specify a cluster, remote file or " + "directory, and a local destination path") ctag = args[0] lpath = args[-1] rpaths = args[1:-1] cl = self.cm.get_cluster(ctag, load_receipt=False) try: node = cl.get_node(self.opts.node) except exception.InstanceDoesNotExist as ide: if self.opts.node == "master": #may have happened because master node is clustername-master #i.e. dns_prefix = True in config #lets check try: node = cl.get_node('%s-%s' % (ctag, self.opts.node) ) except exception.InstanceDoesNotExist as ide2: #k, master is just not there, raise original error log.debug("Neither master nor %s-%s exist." % (ctag, self.opts.node)) raise( ide ) else: #node name was provided raise if self.opts.user: node.ssh.switch_user(self.opts.user) for rpath in rpaths: if not glob.has_magic(rpath) and not node.ssh.path_exists(rpath): raise exception.BaseException( "Remote file or directory does not exist: %s" % rpath) node.ssh.get(rpaths, lpath)
def extract_features(recording_files, nr_ceps=12): print("skipping features") return Ceps()(range(100)) nr_utt_in_ubm = 300 win_length_ms = 25 # The window length of the cepstral analysis in milliseconds win_shift_ms = 10 # The window shift of the cepstral analysis in milliseconds nr_filters = 24 # NOTSURE The number of filter bands nr_ceps = nr_ceps # The number of cepstral coefficients f_min = 0. # NOTSURE The minimal frequency of the filter bank f_max = 4000. # NOTSURE The maximal frequency of the filter bank delta_win = 2 # NOTSURE The integer delta value used for computing the first and second order derivatives pre_emphasis_coef = 0.97 # NOTSURE The coefficient used for the pre-emphasis dct_norm = True # NOTSURE A factor by which the cepstral coefficients are multiplied mel_scale = True # Tell whether cepstral features are extracted on a linear (LFCC) or Mel (MFCC) scale # TODO add feature wrapping if glob.has_magic(recording_files): recording_files = glob.glob(recording_files) rate, ubm_wav = wavfile.read(recording_files.pop()) for recording_file in recording_files: rate, signal = wavfile.read(recording_file) ubm_wav = np.append(ubm_wav, signal) c = Ceps(rate, win_length_ms, win_shift_ms, nr_filters, nr_ceps, f_min, f_max, delta_win, pre_emphasis_coef, mel_scale, dct_norm) ubm_wav = np.cast['float'](ubm_wav) # vector should be in **float** mfcc = c(ubm_wav) return mfcc
def root_glob(pathname): # Split the pathname into a directory and basename # (which should include the wild-card) dirs, basename = os.path.split(pathname) if gl.has_magic(dirs): dirs = root_glob(dirs) else: dirs = [dirs] files = [] for dirname in dirs: # Uses `TSystem` to open the directory. # TSystem itself wraps up the calls needed to query xrootd. dirname = gSystem.ExpandPathName(dirname) directory = gSystem.OpenDirectory(dirname) if directory: for file in __directory_iter(directory): if file in [".", ".."]: continue if not fnmatch.fnmatchcase(file, basename): continue files.append(os.path.join(dirname, file)) try: gSystem.FreeDirectory(directory) except TypeError: pass return files
def get(self, remotepaths, localpath=''): """ Copies one or more files from the remote host to the local host. """ remotepaths = self._make_list(remotepaths) localpath = localpath or os.getcwd() globs = [] noglobs = [] for rpath in remotepaths: if glob.has_magic(rpath): globs.append(rpath) else: noglobs.append(rpath) globresults = [self.glob(g) for g in globs] remotepaths = noglobs for globresult in globresults: remotepaths.extend(globresult) recursive = False for rpath in remotepaths: if not self.path_exists(rpath): raise exception.BaseException( "Remote file or directory does not exist: %s" % rpath) for rpath in remotepaths: if self.isdir(rpath): recursive = True break self.scp.get(remotepaths, localpath, recursive=recursive)
def resolve_contents(self, env): """Returns contents, with globbed patterns resolved to actual filenames. """ # TODO: We cache the values, which in theory is problematic, since # due to changes in the env object, the result of the globbing may # change. Not to mention that a different env object may be passed # in. We should find a fix for this. if not getattr(self, '_resolved_contents', None): l = [] for item in self.contents: if isinstance(item, basestring): # We only go through glob() if this actually is a # pattern; this means that invalid filenames will # remain in the content set, and only raise an error # at a later point in time. # TODO: This is possible a good place to check for # a file's existance though; currently, when in debug # mode, no error would be raised at all, and simply a # broken url sent to the browser. if glob.has_magic(item): path = env.abspath(item) for f in glob.glob(path): l.append(f[len(path)-len(item):]) else: l.append(item) else: l.append(item) self._resolved_contents = l return self._resolved_contents
def _glib_regex(pathname_pattern): """ Helper for taking pathname patterns and converting them into Python regexes with Unix pathname matching behavior. :param pathname_pattern: String pathname :return: Compiled Regex """ # First escape '.' pathname_pattern.replace('.', '\.') if has_magic(pathname_pattern): # Replace unspecific '*' and '?' regex appropriate specifiers ('.') for special_char in ('*', '?'): split_pattern = pathname_pattern.split(special_char) new_split_pattern = [] # For each section, if there is no regex appropriate closure, add a generic catch. for bucket in split_pattern: if bucket: # If previous character is not regex closure and is not end of string, then add char... if bucket[-1] != ']' and split_pattern.index(bucket) != len(split_pattern) - 1: bucket += '.' elif split_pattern.index(bucket) == 0: # If match char was beginning of string, add regex char... bucket += '.' new_split_pattern.append(bucket) # Rejoin on special characters pathname_pattern = special_char.join(new_split_pattern) return re.compile(pathname_pattern)
def globargv(argv): if len(argv) > 2: import glob l = [] map(lambda gl: l.extend(gl), map(lambda arg: glob.has_magic(arg) and glob.glob(arg) or [arg], argv[2:])) argv = argv[0:2] + l return argv[1:]
def _check_matches(patterns, paths): if not patterns and not paths: # Matched to the end. return True if (not patterns and paths) or (patterns and not paths): return False pattern = patterns[0] path = paths[0] if not glob.has_magic(pattern): if pattern != path: return False elif pattern == '**': if len(patterns) == 1: return True # if ** is the last one it matches anything to the right. for i in xrange(len(paths)): # Recursively check the remaining patterns as the # current pattern could match any number of paths. if _check_matches(patterns[1:], paths[i:]): return True elif not fnmatch.fnmatch(path, pattern): # Current part doesn't match. return False return _check_matches(patterns[1:], paths[1:])
def main(): options = parse_cmd_line() input_file = options.input output = options.output spacing = options.spacing modality = options.modality image_type = options.type patient = options.patient serie = options.serie institution = options.institution # patient_id = gdcm.UIDGenerator().Generate() #int(random.random() * 10000) # study_uid = #int(random.random() * 10000) converter = Img2Dcm(patient, institution, modality, serie, spacing) if os.path.isfile(input_file): converter.img2dcm(input_file, output, image_type, 0) else: c_re = re.compile('\d+') if glob.has_magic(input_file): files = glob.glob(input_file) else: files = [f for f in glob.glob(os.path.join(input_file, '*')) \ if c_re.findall(f)] files.sort(key = lambda x: c_re.findall(x)[-1]) if not os.path.exists(output): os.makedirs(output) for image_number, f in enumerate(files): output_file = os.path.join(output, "%04d.dcm" % image_number) converter.img2dcm(f, output_file, image_type, image_number)
def match_files(paths): filelists = [] for path in paths: if glob.has_magic(path): files = [os.path.abspath(f) for f in glob.glob(path)] if not files: log.error( 'Wildcard pattern %r did not match any files.' % path) sys.exit(2) filelists.append(files) elif os.path.isdir(path): filelists.append([os.path.abspath(f) for f in os.listdir(path)]) elif os.path.isfile(path): filelists.append([path]) else: log.error( '%r is not an existing file, directory, or wildcard pattern; ' 'see `fitsdiff --help` for more usage help.' % path) sys.exit(2) filelists[0].sort() filelists[1].sort() for a, b in [(0, 1), (1, 0)]: if len(filelists[a]) > len(filelists[b]): for extra in filelists[a][len(filelists[b]):]: log.warning('%r has no match in %r' % (extra, paths[b])) filelists[a] = filelists[a][:len(filelists[b])] break return zip(*filelists)
def xrootd_iglob(pathname, raise_error): """Handles the actual interaction with xrootd Provides a python generator over files that match the wild-card expression. """ # Split the pathname into a directory and basename dirs, basename = os.path.split(pathname) if gl.has_magic(dirs): dirs = list(xrootd_iglob(dirs, raise_error)) else: dirs = [dirs] for dirname in dirs: host, path = split_url(dirname) query = FileSystem(host) if not query: raise RuntimeError("Cannot prepare xrootd query") status, dirlist = query.dirlist(path) if status.error: if not raise_error: continue raise RuntimeError("'{!s}' for path '{}'".format(status, dirname)) for entry in dirlist.dirlist: filename = entry.name if filename in [".", ".."]: continue if not fnmatch.fnmatchcase(filename, basename): continue yield os.path.join(dirname, filename)
def _entry_module(self, *e): modules = [self._module_entry.get()] if glob.has_magic(modules[0]): modules = glob.glob(modules[0]) for name in modules: self.add_module(name, check=1) self._module_entry.delete(0, 'end')
def get(self, remotepaths, localpath=''): """ Copies one or more files from the remote host to the local host. """ remotepaths = self._make_list(remotepaths) localpath = localpath or os.getcwd() globs = [] noglobs = [] for rpath in remotepaths: if glob.has_magic(rpath): globs.append(rpath) else: noglobs.append(rpath) globresults = [self.glob(g) for g in globs] remotepaths = noglobs for globresult in globresults: remotepaths.extend(globresult) recursive = False for rpath in remotepaths: if not self.path_exists(rpath): raise exception.BaseException( "Remote file or directory does not exist: %s" % rpath) for rpath in remotepaths: if self.isdir(rpath): recursive = True break try: self.scp.get(remotepaths, local_path=localpath, recursive=recursive) except Exception, e: log.debug(source="sshutils", msg="get failed: remotepaths=%s, localpath=%s" % (str(remotepaths), localpath)) raise exception.SCPException(str(e))
def parse(text, here=None): """ Parse contents of a features list file as text. :param text: Contents of a features list(file). :param here: Current working directory to use (optional). :return: List of FileLocation objects """ locations = [] for line in text.splitlines(): filename = line.strip() if not filename: continue # SKIP: Over empty line(s). elif filename.startswith('#'): continue # SKIP: Over comment line(s). if here and not os.path.isabs(filename): filename = os.path.join(here, line) filename = os.path.normpath(filename) if glob.has_magic(filename): # -- WITH WILDCARDS: for filename2 in glob.iglob(filename): location = FileLocationParser.parse(filename2) locations.append(location) else: location = FileLocationParser.parse(filename) locations.append(location) return locations
def _exclude(x): exclm = re.search("^exclude:(.+)$", x) if exclm is not None: exclm = exclm.group(1) if not hasattr(glob, "has_magic") or glob.has_magic(exclm): return glob.glob(exclm) return [exclm] return []
def _expand(x): if re.search("^.+:", x) is not None: findm = re.search(r"^find:(.*?):(.*$)$", x) if findm is not None: return list(_find(findm.group(1), findm.group(2))) return [] if not hasattr(glob, "has_magic") or glob.has_magic(x): return glob.glob(x) return [x]
def clean_files(*args): import glob for path in _args_to_list(args): if glob.has_magic(path): clean_files(glob.glob(path)) elif os.path.isfile(path): os.remove(path) elif os.path.isdir(path): shutil.rmtree(path)
def do_command(argv): newargv = argv[:1] for arg in argv[1:]: if glob.has_magic(arg): newargv.extend(glob.glob(arg)) else: newargv.append(arg) proc = PM.spawnprocess(SubProcess, newargv, logfile=None, env=None, callback=_child_exited, persistent=0, merge=1, async=0) proc.wait()
def _generic_reader(pathname_or_url=None, callback_func=None, **kwargs): if not isinstance(pathname_or_url, (str, native_str)): # not a string - we assume a file-like object try: # first try reading directly generic = callback_func(pathname_or_url, **kwargs) except TypeError: # if this fails, create a temporary file which is read directly # from the file system pathname_or_url.seek(0) with NamedTemporaryFile() as fh: fh.write(pathname_or_url.read()) generic = callback_func(fh.name, **kwargs) return generic elif isinstance(pathname_or_url, bytes) and \ pathname_or_url.strip().startswith(b'<'): # XML string return callback_func(io.BytesIO(pathname_or_url), **kwargs) elif "://" in pathname_or_url[:10]: # URL # extract extension if any suffix = os.path.basename(pathname_or_url).partition('.')[2] or '.tmp' with NamedTemporaryFile(suffix=sanitize_filename(suffix)) as fh: download_to_file(url=pathname_or_url, filename_or_buffer=fh) generic = callback_func(fh.name, **kwargs) return generic else: pathname = pathname_or_url # File name(s) pathnames = sorted(glob.glob(pathname)) if not pathnames: # try to give more specific information why the stream is empty if glob.has_magic(pathname) and not glob.glob(pathname): raise Exception("No file matching file pattern: %s" % pathname) elif not glob.has_magic(pathname) and not os.path.isfile(pathname): raise IOError(2, "No such file or directory", pathname) generic = callback_func(pathnames[0], **kwargs) if len(pathnames) > 1: for filename in pathnames[1:]: generic.extend(callback_func(filename, **kwargs)) return generic
def globfix(files): # expand wildcards where necessary if sys.platform == "win32": out = [] for file in files: if glob.has_magic(file): out.extend(glob.glob(file)) else: out.append(file) return out return files
def add_file(self, file_path: str) -> None: if has_magic(file_path): for e in glob(file_path, recursive=True): self.add_file(e) else: with open(file_path, "r") as file_handle: entries = json.load(file_handle) for entry in entries: self.add_entry(entry)
def GetUnknownIncompatibleDirectories(): """Gets a list of third-party directories which use licenses incompatible with Android which are not present in the known_issues.py file. This is used by the AOSP bot. Returns: A list of directories. """ incompatible_directories = frozenset(GetIncompatibleDirectories()) known_incompatible = [] for path, exclude_list in known_issues.KNOWN_INCOMPATIBLE.iteritems(): for exclude in exclude_list: if glob.has_magic(exclude): exclude_dirname = os.path.dirname(exclude) if glob.has_magic(exclude_dirname): print ('Exclude path %s contains an unexpected glob expression,' \ ' skipping.' % exclude) exclude = exclude_dirname known_incompatible.append(os.path.normpath(os.path.join(path, exclude))) known_incompatible = frozenset(known_incompatible) return incompatible_directories.difference(known_incompatible)
def glob(pathname): # Let normal python glob try first try_glob = gl.glob(pathname) if try_glob: return try_glob # If pathname does not contain a wildcard: if not gl.has_magic(pathname): return [pathname] # Else try xrootd instead return xrootd_glob(pathname)
def glob(pathname): # Let normal python glob try first try_glob = gl.glob(pathname) if try_glob: return try_glob # If pathname does not contain a wildcard: if not gl.has_magic(pathname): return [pathname] # Else use ROOT's remote system querying return root_glob(pathname)
def glob(self, path, **kwargs): """ Find files by glob-matching. If the path ends with '/' and does not contain "*", it is essentially the same as ``ls(path)``, returning only files. We support ``"**"``, ``"?"`` and ``"[..]"``. kwargs are passed to ``ls``. """ import re from glob import has_magic ends = path.endswith("/") path = self._strip_protocol(path) indstar = path.find("*") if path.find("*") >= 0 else len(path) indques = path.find("?") if path.find("?") >= 0 else len(path) indbrace = path.find("[") if path.find("[") >= 0 else len(path) ind = min(indstar, indques, indbrace) if not has_magic(path): root = path depth = 1 if ends: path += "/*" elif self.exists(path): return [path] else: return [] # glob of non-existent returns empty elif "/" in path[:ind]: ind2 = path[:ind].rindex("/") root = path[:ind2 + 1] depth = 20 if "**" in path else path[ind2 + 1:].count("/") + 1 else: root = "" depth = 20 if "**" in path else 1 allpaths = self.find(root, maxdepth=depth, withdirs=True, **kwargs) pattern = ("^" + (path.replace( "\\", r"\\").replace(".", r"\.").replace("+", r"\+").replace( "//", "/").replace("(", r"\(").replace(")", r"\)").replace( "|", r"\|").rstrip("/").replace("?", ".")) + "$") pattern = re.sub("[*]{2}", "=PLACEHOLDER=", pattern) pattern = re.sub("[*]", "[^/]*", pattern) pattern = re.compile(pattern.replace("=PLACEHOLDER=", ".*")) out = { p for p in allpaths if pattern.match(p.replace("//", "/").rstrip("/")) } return list(sorted(out))
def get_stat_dir(self, rawline, datacr): """Return an iterator object that yields a list of files matching a dirname pattern non-recursively in a form suitable for STAT command. - (str) rawline: the raw string passed by client as command argument. """ ftppath = self.ftpnorm(rawline) if not glob.has_magic(ftppath): return self.get_list_dir(self.ftp2fs(rawline, datacr)) else: basedir, basename = os.path.split(ftppath) if glob.has_magic(basedir): return iter(['Directory recursion not supported.\r\n']) else: basedir = self.ftp2fs(basedir, datacr) listing = self.glob1(basedir, basename) if listing: listing.sort() return self.format_list(basedir, listing)
def _list_part_files(path, filesystem=None): if filesystem is None: isdir = os.path.isdir glob_iter = glob.glob path_sep = os.path.sep else: isdir = filesystem.isdir glob_iter = filesystem.glob is_local = type(filesystem).__name__.lower().find('local') >= 0 path_sep = os.path.sep if is_local else '/' if not glob.has_magic(path) and isdir(path): path = path.rstrip(path_sep) + f'{path_sep}*.parquet' if glob.has_magic(path): files = list(glob_iter(path)) files.sort() else: files = [path] return files
def iglob(pathname, yield_even_not_exists=False): pathname = os.path.expandvars(pathname) dirname, basename = os.path.split(pathname) assert dirname != pathname, pathname if not glob.has_magic(pathname): if yield_even_not_exists: yield pathname elif basename: if os.path.lexists(pathname): yield pathname else: if os.path.isdir(dirname): yield dirname return if dirname == "": dirname = os.curdir if glob.has_magic(dirname): dirs = set(iglob(dirname)) else: dirs = [dirname] if basename == "**": glob_in_dir = _iglobstar elif glob.has_magic(basename): glob_in_dir = _glob1 else: glob_in_dir = glob.glob0 for dirname in dirs: for name in glob_in_dir(dirname, basename): if glob_in_dir is _iglobstar: f = name else: f = os.path.join(dirname, name) yield f
def resolve_contents(self, env=None, force=False): """Convert bundle contents into something that can be easily processed. - Glob patterns are resolved - Validate all the source paths to complain about missing files early. - Third party extensions get to hook into this to provide a basic virtualized filesystem. The return value is a list of 2-tuples (relpath, abspath). The first element is the path that is assumed to be relative to the ``Environment.directory`` value. We need it to construct urls to the source files. The second element is the absolute path to the actual location of the file. Depending on the magic a third party extension does, this may be somewhere completely different. URLs and nested Bundles are returned as a 2-tuple where both items are the same. Set ``force`` to ignore any cache, and always re-resolve glob patterns. """ env = self._get_env(env) # TODO: We cache the values, which in theory is problematic, since # due to changes in the env object, the result of the globbing may # change. Not to mention that a different env object may be passed # in. We should find a fix for this. if getattr(self, '_resolved_contents', None) is None or force: l = [] for item in self.contents: if isinstance(item, Bundle): l.append((item, item)) else: if is_url(item): # Is a URL l.append((item, item)) elif isinstance(item, basestring) and has_magic(item): # Is globbed pattern path = env.abspath(item) for f in glob.glob(path): l.append((f[len(path) - len(item):], f)) else: # Is just a normal path; Send it through # _normalize_source_path(). try: l.append((item, env._normalize_source_path(item))) except IOError, e: raise BundleError(e) self._resolved_contents = l
def globargs(args=None): if args is None: args = sys.argv[1:] l = [] for arg in args: if glob.has_magic(arg): # Glob pattern l.extend(glob.glob(arg)) else: # Regular filename l.append(arg) return l
def build(self): super().build() files = self.options.files globs = {f: files[f] for f in files if glob.has_magic(f)} filepaths = { os.path.join(self.builddir, f): files[f] for f in files if not glob.has_magic(f) } for src in globs: paths = glob.glob(os.path.join(self.builddir, src)) if not paths: raise EnvironmentError('no matches for {!r}'.format(src)) for path in paths: filepaths.update( {os.path.join(self.builddir, path): globs[src]}) for src in sorted(filepaths): dst = os.path.join(self.installdir, filepaths[src].lstrip('/')) os.makedirs(os.path.dirname(dst), exist_ok=True) _recursively_link(src, dst, self.installdir)
def consider_single_directory(self, directory, item): """Resolve ``item`` within ``directory``, glob or non-glob style Primarily to be called from subclasses rather than overridden. """ expr = path.join(directory, item) if has_magic(expr): # Note: No error if glob returns an empty list return list(self.glob(directory, item)) else: if path.exists(expr): return expr raise IOError("'%s' does not exist" % expr)
def __init__(self, pattern, case_sensitive, inclusive): pattern = self.norm_path(pattern, ispattern=True) assert not glob.has_magic(pattern) if not case_sensitive: pattern = self.norm_case(pattern) # Ensure we match the full name of the first part to match in the path if pattern[0] != os.sep: pattern = os.sep + pattern super().__init__(pattern, case_sensitive, inclusive) self.seps_count = self.pattern.count(os.sep)
def gen_dump_files(dump_files: Union[str, Iterable[str]]) -> Iterator[str]: """Generate files potentially resolving glob patterns if any""" if isinstance(dump_files, str): dump_files = [dump_files] for dump_file in dump_files: if glob.has_magic(dump_file): # if the dump_file is a glob pattern one, resolve it yield from ( fname for fname in sorted(glob.glob(dump_file), key=basename_sortkey) ) else: # otherwise, just return the filename yield dump_file
def find_files(source, target, patterns): """Locates the specified data-files and returns the matches in a data_files compatible format. source is the root of the source data tree. Use '' or '.' for current directory. target is the root of the target data tree. Use '' or '.' for the distribution directory. patterns is a sequence of glob-patterns for the files you want to copy. """ if glob.has_magic(source) or glob.has_magic(target): raise ValueError("Magic not allowed in src, target") ret = {} for pattern in patterns: pattern = os.path.join(source, pattern) for filename in glob.glob(pattern): if os.path.isfile(filename): targetpath = os.path.join(target, os.path.relpath(filename, source)) path = os.path.dirname(targetpath) ret.setdefault(path, []).append(filename) return sorted(ret.items())
def my_glob(pathname): """Like glob.glob, but doesn't filter out invalid file names. We want to catch that error explicitly to avoid user confusion.""" if not glob.has_magic(pathname): yield pathname return dirname, basename = os.path.split(pathname) if not dirname: for name in glob.glob1(os.curdir, basename): yield name return if glob.has_magic(dirname): dirs = my_glob(dirname) else: dirs = [dirname] if glob.has_magic(basename): glob_in_dir = glob.glob1 else: glob_in_dir = glob.glob0 for dirname in dirs: for name in glob_in_dir(dirname, basename): yield os.path.join(dirname, name)
def globargv(argv): """Expand all arguments in argv, all of glob charaters, environment variables, and user shorthand. Return a new list with what can be exanded so expanded, and those that can't are added as-is. """ if len(argv) > 1: newargv = [argv[0]] for rawarg in argv[1:]: arg = os.path.expandvars(os.path.expanduser(rawarg)) gl = glob.has_magic(arg) and glob.glob(arg) or [arg] newargv.extend(gl) return newargv else: return argv
def match_files(paths): if os.path.isfile(paths[0]) and os.path.isfile(paths[1]): # shortcut if both paths are files return [paths] dirnames = [None, None] filelists = [None, None] for i, path in enumerate(paths): if glob.has_magic(path): files = [os.path.split(f) for f in glob.glob(path)] if not files: log.error('Wildcard pattern %r did not match any files.', path) sys.exit(2) dirs, files = list(zip(*files)) if len(set(dirs)) > 1: log.error( 'Wildcard pattern %r should match only one ' 'directory.', path) sys.exit(2) dirnames[i] = set(dirs).pop() filelists[i] = sorted(files) elif os.path.isdir(path): dirnames[i] = path filelists[i] = [ f for f in sorted(os.listdir(path)) if os.path.isfile(os.path.join(path, f)) ] elif os.path.isfile(path): dirnames[i] = os.path.dirname(path) filelists[i] = [os.path.basename(path)] else: log.error( '%r is not an existing file, directory, or wildcard ' 'pattern; see `fitsdiff --help` for more usage help.', path) sys.exit(2) dirnames[i] = os.path.abspath(dirnames[i]) filematch = set(filelists[0]) & set(filelists[1]) for a, b in [(0, 1), (1, 0)]: if len(filelists[a]) > len(filematch) and not os.path.isdir(paths[a]): for extra in sorted(set(filelists[a]) - filematch): log.warning('%r has no match in %r', extra, dirnames[b]) return [(os.path.join(dirnames[0], f), os.path.join(dirnames[1], f)) for f in filematch]
def search_for_source(self, item): if not self.use_staticfiles: return Resolver.search_for_source(self, item) # Use the staticfiles finders to determine the absolute path if finders: if has_magic(item): return list(self.glob_staticfiles(item)) else: f = finders.find(item) if f is not None: return f raise IOError("'%s' not found (using staticfiles finders)" % item)
def validate(source, source_type=None, **options): """Validate resource API | Usage -------- | -------- Public | `from frictionless import validate` Parameters: source (dict|str): a data source source_type (str): source type - inquiry, package, resource, schema or table **options (dict): options for the underlaying function Returns: Report: validation report """ module = import_module("frictionless.validate") # Normalize source # NOTE: move to lower-levels if isinstance(source, Path): source = str(source) # Detect source type # NOTE: move to helpers if not source_type: if source and isinstance(source, list) and isinstance(source[0], str): basepath = options.pop("basepath", None) trusted = options.pop("trusted", False) package = Package(basepath=basepath, trusted=trusted) package.infer(source) source = package source_type = "package" if isinstance(source, str): if glob.has_magic(source): package = Package() package.infer(source) source = package source_type = "package" elif os.path.isdir(source): package = Package() package.infer(f"{source}/*") source = package source_type = "package" if not source_type: source_type = helpers.detect_source_type(source) # Validate source validate = getattr(module, "validate_%s" % source_type) return validate(source, **options)
def main(): args = vars(parse_arguments(sys.argv[1:])) if args['dir'] is None and args['swc'] is None and args['marker'] is None: print "You need to provide one of the following arguments: directory, swc file, marker file" sys.exit(1) reconstruction_files = [] swc_file = None marker_file = None if args['dir']: for file_name in args['dir']: if glob.has_magic(file_name): reconstruction_files += glob.glob(file_name) else: reconstruction_files.append(file_name) swc_files = [f for f in reconstruction_files if f.endswith('.swc')] marker_files = [ f for f in reconstruction_files if f.endswith('.marker') ] if len(swc_files) > 1 or len(marker_files) > 1: print "You cannot choose a directory with more than one swc or marker file" sys.exit(1) else: if len(swc_files) == 0: print "No swc file in the directory. No swc validation was done." sys.exit(1) else: matching_morphology_name = marker_files[0].replace( '.marker', '.swc') if matching_morphology_name != swc_files[0]: print "No matching .swc file found. No marker validation was done for:\n %s \n\n" % marker_files[ 0] sys.exit(1) swc_file = swc_files[0] marker_file = marker_files[0] else: swc_file = args['swc'] marker_file = args['marker'] report = Report() try: swc.read_swc(swc_file, strict_validation=True) report.add_swc_results(swc_file, []) except InvalidMorphology, im: report.add_swc_results(swc_file, im.validation_errors)
def load_data(data_path, *, reset_index=False, reader_mapping=None, **kwargs): import os.path as path import glob if reader_mapping is None: reader_mapping = { 'csv': partial(pd.read_csv, low_memory=False), 'txt': partial(pd.read_csv, low_memory=False), 'parquet': pd.read_parquet, 'par': pd.read_parquet, 'json': pd.read_json, 'pkl': pd.read_pickle, 'pickle': pd.read_pickle, } def get_file_format(file_path): return path.splitext(file_path)[-1].lstrip('.') def get_file_format_by_glob(data_pattern): for f in glob.glob(data_pattern, recursive=True): fmt_ = get_file_format(f) if fmt_ in reader_mapping.keys(): return fmt_ return None if glob.has_magic(data_path): fmt = get_file_format_by_glob(data_path) elif not path.exists(data_path): raise ValueError(f'Not found path {data_path}') elif path.isdir(data_path): path_pattern = f'{data_path}*' if data_path.endswith( path.sep) else f'{data_path}{path.sep}*' fmt = get_file_format_by_glob(path_pattern) else: fmt = path.splitext(data_path)[-1].lstrip('.') if fmt not in reader_mapping.keys(): raise ValueError(f'Not supported data format{fmt}') fn = reader_mapping[fmt] df = fn(data_path, **kwargs) if reset_index: df.reset_index(drop=True, inplace=True) return df
def expand(self, path_list): """Expand a list of path using glob magic. :param list[str] path_list: A list of path which may contains magic :rtype: list[str] :returns: A list of path without magic """ path_list2 = [] for path in path_list: if glob.has_magic(path): iterator = glob.iglob(path) path_list2.extend(iterator) else: path_list2.append(path) return path_list2
def __init__(self, path='.gitignore'): self.names = self.patterns = () path = Path(path) if not path.exists(): return # read .gitignore patterns lines = path.read_text().splitlines() lines = (_.strip().rstrip('/') for _ in lines if not _.startswith('#')) items = names, patterns = [], [] for line in filter(None, lines): items[glob.has_magic(line)].append(line) self.names = tuple(names) self.patterns = tuple(patterns)
def expand_args(args): """ Takes an argv and expand it (under Windows, cmd does not convert *.tif into a list of files. :param list args: list of files or wildcards :return: list of actual args """ new = [] for afile in args: if glob.has_magic(afile): new += glob.glob(afile) else: new.append(afile) return new
def consider_single_directory(self, directory, item): """Searches for ``item`` within ``directory``. Is able to resolve glob instructions. Subclasses can call this when they have narrowed done the location of a bundle item to a single directory. """ expr = path.join(directory, item) if has_magic(expr): # Note: No error if glob returns an empty list return list(self.glob(directory, item)) else: if path.exists(expr): return expr raise IOError("'%s' does not exist" % expr)
def _resolve_include(self, source_filename, include_spec, fileconfig, visited): dirname = os.path.dirname(source_filename) include_spec = include_spec.strip() include_glob = os.path.join(dirname, include_spec) include_filenames = glob.glob(include_glob) if not include_filenames and not glob.has_magic(include_glob): # Empty set is OK if wildcard but not for direct file reference raise error("Include file '%s' does not exist" % (include_glob,)) include_filenames.sort() for include_filename in include_filenames: include_data = self._read_config_file(include_filename) self._parse_config(include_data, include_filename, fileconfig, visited) return include_filenames
def _glob(self, pattern): # type: (Text) -> List[Text] if pattern.endswith("/."): pattern = pattern[:-1] dirname, basename = pattern.rsplit('/', 1) if not glob.has_magic(pattern): if basename: if self.exists(pattern): return [pattern] else: # Patterns ending in slash should match only directories if self.isdir(dirname): return [pattern] return [] if not dirname: return self._glob1(basename) dirs = self._glob(dirname) if glob.has_magic(basename): glob_in_dir = self._glob1 else: glob_in_dir = self._glob0 results = [] for dirname in dirs: results.extend(glob_in_dir(basename, dirname)) return results
def extract(source, *, source_type=None, process=None, stream=False, **options): """Extract resource rows API | Usage -------- | -------- Public | `from frictionless import extract` Parameters: source (dict|str): data source source_type (str): source type - package, resource or table process? (func): a row processor function stream? (bool): return a row stream(s) instead of loading into memory **options (dict): options for the underlaying function Returns: Row[]|{path: Row[]}: rows in a form depending on the source type """ module = import_module("frictionless.extract") # Normalize source # NOTE: move to lower-levels if isinstance(source, Path): source = str(source) # Detect source type # NOTE: move to helpers if not source_type: if not callable(source): if hasattr(source, "read"): source_type = "resource" elif isinstance(source, list) or glob.has_magic(source): package = Package() package.infer(source) source = package elif os.path.isdir(source): package = Package() package.infer(f"{source}/*") source = package source_type = helpers.detect_source_type(source) # Extract source extract = getattr(module, "extract_%s" % source_type) return extract(source, process=process, stream=stream, **options)