def get_as_list(self, *argstrs): if len(argstrs) < 1: raise cerr.InvalidArgumentError( "One or more argument strings must be provided") elif len(argstrs) == 1 and type(argstrs[0]) in (tuple, list): argstrs = argstrs[0] argstrs_invalid = set(argstrs).difference(set(self.argstr2varstr)) if argstrs_invalid: raise cerr.InvalidArgumentError( "This {} object does not have the following argument strings: {}" .format(type(self).__name__, list(argstrs_invalid))) values = [ self.vars_dict[self.argstr2varstr[argstr]] for argstr in argstrs ] return values
def _jobscript_condopt_eval(self, condopt_expr, out_type): if out_type not in (str, eval): raise cerr.InvalidArgumentError( "`out_type` must be either str or eval") vars_dict = self.vars_dict for varstr in sorted(vars_dict.keys(), key=len, reverse=True): possible_substr = { '%' + s for s in [ varstr, self.varstr2argstr[varstr], self.varstr2argstr[varstr].lstrip('-') ] } possible_substr = possible_substr.union( {s.lower() for s in possible_substr}, {s.upper() for s in possible_substr}) for substr in possible_substr: if substr in condopt_expr: replstr = str( vars_dict[varstr] ) if out_type is str else "vars_dict['{}']".format(varstr) condopt_expr = condopt_expr.replace(substr, replstr) break return out_type(condopt_expr)
def __init__(self, report_item=None, initial_file_total=0, initial_folder_total=0, initial_file_estimate=0, initial_folder_estimate=0, track_estimates=True): if report_item is not None and report_item not in WALK_TRACK_CHOICES: raise cerr.InvalidArgumentError("`report_item` argument must be one of {}, " "but was {}".format(WALK_TRACK_CHOICES, report_item)) self.report_item = report_item self.track_estimates = track_estimates self.total_folder_count = initial_folder_total self.total_file_count = initial_file_total self.total_folder_estimate = initial_folder_estimate self.total_file_estimate = initial_file_estimate if track_estimates: self.max_depth_found = 0 self.nentries_at_depth = [1] self.nfolders_at_depth = [1] self.nfiles_at_depth = [0] # self.nfolders_per_entry_at_depth = [[1]] # self.nfiles_per_entry_at_depth = [[0]] self.nfolders_estimate_at_depth = [1] self.nfiles_estimate_at_depth = [0]
def _walk( srcdir, dstdir=None, mindepth=None, maxdepth=float('inf'), outdepth=None, dmatch_maxdepth=None, fmatch=None, fmatch_re=None, fexcl=None, fexcl_re=None, dmatch=None, dmatch_re=None, dexcl=None, dexcl_re=None, fsub=None, dsub=None, copy_method=None, copy_overwrite_files=None, copy_overwrite_dirs=None, copy_overwrite_dmatch=None, sync_tree=False, transplant_tree=False, collapse_tree=False, copy_dryrun=None, copy_quiet=None, copy_debug=None, allow_dir_op=None, mkdir_upon_file_copy=False, allow_nonstd_shprogs=False, copy_shcmd_fmtstr=None, list_function=None, rematch_function=None, resub_function=None, rematch_partial=False ): if not os.path.isdir(srcdir): raise cerr.InvalidArgumentError("`srcdir` directory does not exist: {}".format(srcdir)) # if dstdir is not None and copy_method is None: # raise InvalidArgumentError("`copy_method` must be provided to utilize `dstdir` argument") if dstdir is None and (copy_method or copy_quiet or copy_dryrun or copy_shcmd_fmtstr): raise cerr.InvalidArgumentError("`dstdir` must be provided to use file copy options") walk_object = WalkObject( mindepth, maxdepth, outdepth, dmatch_maxdepth, fmatch, fmatch_re, fexcl, fexcl_re, dmatch, dmatch_re, dexcl, dexcl_re, fsub, dsub, copy_method, copy_overwrite_files, copy_overwrite_dirs, copy_overwrite_dmatch, sync_tree, transplant_tree, collapse_tree, copy_dryrun, copy_quiet, copy_debug, allow_dir_op, mkdir_upon_file_copy, allow_nonstd_shprogs, copy_shcmd_fmtstr, list_function, rematch_function, resub_function, rematch_partial ) if mindepth == 0: updir = os.path.dirname(srcdir) srcdname = os.path.basename(srcdir) yield updir, [srcdname], [] for x in walk_object.walk(srcdir, dstdir): yield x
def copy_tree( srcdir, dstdir, copy_method='copy', sync_tree=False, transplant_tree=False, collapse_tree=False, overwrite_files=False, overwrite_dirs=False, overwrite_dmatch=False, mindepth=None, maxdepth=float('inf'), dmatch_maxdepth=None, fmatch=None, fmatch_re=None, fexcl=None, fexcl_re=None, dmatch=None, dmatch_re=None, dexcl=None, dexcl_re=None, fsub=None, dsub=None, vreturn=None, vyield=None, print_findings=False, dryrun=False, quiet=False, debug=False, allow_dir_op=None, mkdir_upon_file_copy=False, allow_nonstd_shprogs=False, copy_shcmd_fmtstr=None, list_function=None, rematch_function=None, resub_function=None, rematch_partial=False ): if dstdir is None: raise cerr.InvalidArgumentError("`dstdir` cannot be None") if copy_method is None: raise cerr.InvalidArgumentError("`copy_method` cannot be None") find( srcdir, dstdir, vreturn, vyield, print_findings, mindepth, maxdepth, dmatch_maxdepth, fmatch, fmatch_re, fexcl, fexcl_re, dmatch, dmatch_re, dexcl, dexcl_re, fsub, dsub, copy_method, overwrite_files, overwrite_dirs, overwrite_dmatch, sync_tree, transplant_tree, collapse_tree, dryrun, quiet, debug, allow_dir_op, mkdir_upon_file_copy, allow_nonstd_shprogs, copy_shcmd_fmtstr, list_function, rematch_function, resub_function, rematch_partial )
def datetime_to_datestring(dt): if type(dt) not in datetime_date_types: raise cerr.InvalidArgumentError( "`dt` type must be one of {}, but was {}".format( datetime_date_types, type(dt) ) ) return "{:04}-{:02}-{:02}".format( dt.year, dt.month, dt.day )
def datetime_to_timestring(dt): if type(dt) is not datetime.datetime: raise cerr.InvalidArgumentError( "`dt` type must be {}, but was {}".format( datetime.datetime, type(dt) ) ) return "{:02}:{:02}:{:02}".format( dt.hour, dt.minute, dt.second )
def set(self, argstrs, newval=None): if type(argstrs) in (tuple, list) and type(newval) in ( tuple, list) and len(argstrs) == len(newval): argstr_list = argstrs for argstr_i, newval_i in list(zip(argstrs, newval)): if argstr_i not in self.argstr2varstr: raise cerr.InvalidArgumentError( "This {} object has no '{}' argument string".format( type(self).__name__, argstr_i)) self.vars_dict[self.argstr2varstr[ argstr_i]] = self.argstr2argtype[argstr_i](newval_i) else: argstr_list = argstrs if type(argstrs) in (tuple, list) else [argstrs] for argstr in argstr_list: if argstr not in self.argstr2varstr: raise cerr.InvalidArgumentError( "This {} object has no '{}' argument string".format( type(self).__name__, argstr)) if newval is None: action = self.varstr2action[self.argstr2varstr[argstr]] acttype = type(action) if acttype is argparse._StoreAction and 'function argtype_bool_plus' in str( action.type): newval = True elif acttype in (argparse._StoreTrueAction, argparse._StoreFalseAction): newval = (acttype is argparse._StoreTrueAction) else: raise cerr.InvalidArgumentError( "Setting non-boolean argument string '{}' requires " "a non-None `newval` value".format(argstr)) self.vars_dict[self.argstr2varstr[argstr]] = newval if set(argstr_list).issubset(set(self.pos_argstr)): self._update_cmd() else: self._update_cmd_base()
def unset(self, *argstrs): if len(argstrs) < 1: raise cerr.InvalidArgumentError( "One or more argument strings must be provided") elif len(argstrs) == 1 and type(argstrs[0]) in (tuple, list): argstrs = argstrs[0] for argstr in argstrs: action = self.varstr2action[self.argstr2varstr[argstr]] acttype = type(action) if acttype is argparse._StoreAction and 'function argtype_bool_plus' in str( action.type): newval = False elif acttype in (argparse._StoreTrueAction, argparse._StoreFalseAction): newval = (acttype is argparse._StoreFalseAction) else: newval = None self.vars_dict[self.argstr2varstr[argstr]] = newval if set(argstrs).issubset(set(self.pos_argstr)): self._update_cmd() else: self._update_cmd_base()
def datetime_to_timestamp(dt): dt_type = type(dt) if dt_type is datetime.datetime: return "{:04}{:02}{:02}_{:02}{:02}{:02}".format( dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second ) elif dt_type is datetime.date: return "{:04}{:02}{:02}".format( dt.year, dt.month, dt.day ) else: raise cerr.InvalidArgumentError( "`dt` type must be one of {}, but was {}".format( datetime_date_types, dt_type ))
def get_jobsubmit_cmd(self, scheduler, jobscript=None, jobname=None, time_hr=None, time_min=None, time_sec=None, memory_gb=None, node=None, email=None, envvars=None): cmd = None cmd_envvars = None jobscript_optkey = None total_sec = 0 if time_hr is not None: total_sec += time_hr * 3600 if time_min is not None: total_sec += time_min * 60 if time_sec is not None: total_sec += time_sec if total_sec == 0: time_hms = None else: m, s = divmod(total_sec, 60) h, m = divmod(m, 60) time_hms = '{:d}:{:02d}:{:02d}'.format(h, m, s) if envvars is not None: if type(envvars) in (tuple, list): cmd_envvars = ','.join([ 'p{}="{}"'.format(i, self._escape_comma(a)) for i, a in enumerate(envvars) ]) elif type(envvars) == dict: cmd_envvars = ','.join([ '{}="{}"'.format(var_name, self._escape_comma(var_val)) for var_name, var_val in envvars.items() ]) if scheduler == SCHED_PBS: cmd = ' '.join([ 'qsub', "-N {}".format(jobname) * (jobname is not None), "-l {}".format(','.join([ "nodes={}".format(node) if node is not None else '', "walltime={}".format(time_hms) if time_hms is not None else '', "mem={}gb".format(memory_gb) if memory_gb is not None else '' ]).strip(',')) if (time_hms is not None and memory_gb is not None) else '', "-v {}".format(cmd_envvars) if cmd_envvars is not None else '' "-m ae" if email else '' ]) jobscript_optkey = '#PBS' elif scheduler == SCHED_SLURM: cmd = ' '.join([ 'sbatch', "--job-name {}".format(jobname) if jobname is not None else '', "--time {}".format(time_hms) if time_hms is not None else '', "--mem {}G".format(memory_gb) if memory_gb is not None else '', "-v {}".format(cmd_envvars) if cmd_envvars is not None else '' "--mail-type FAIL,END" if email else '', "--mail-user {}".format(email) if type(email) is str else None ]) jobscript_optkey = '#SBATCH' if jobscript_optkey is not None: jobscript_condoptkey = jobscript_optkey.replace('#', '#CONDOPT_') jobscript_condopts = [] with open(jobscript) as job_script_fp: for line_num, line in enumerate(job_script_fp.readlines(), 1): if line.lstrip().startswith(jobscript_condoptkey): cond_ifval = None cond_cond = None cond_elseval = None cond_remain = line.replace(jobscript_condoptkey, '').strip() cond_parts = [ s.strip() for s in cond_remain.split(' ELSE ') ] if len(cond_parts) == 2: cond_remain, cond_elseval = cond_parts cond_parts = [ s.strip() for s in cond_remain.split(' IF ') ] if len(cond_parts) == 2: cond_ifval, cond_cond = cond_parts try: condopt_add = None if cond_ifval is not None and cond_cond is not None: if self._jobscript_condopt_eval( cond_cond, eval): condopt_add = self._jobscript_condopt_eval( cond_ifval, str) elif cond_elseval is not None: condopt_add = self._jobscript_condopt_eval( cond_elseval, str) elif cond_elseval is not None: raise SyntaxError elif cond_remain.startswith( 'import') or cond_remain.startswith( 'from'): exec(cond_remain) else: condopt_add = self._jobscript_condopt_eval( cond_remain, str) if condopt_add is not None: jobscript_condopts.append(condopt_add) except SyntaxError: raise cerr.InvalidArgumentError(' '.join([ "Invalid syntax in jobscript conditional option:", "\n File '{}', line {}: '{}'".format( jobscript, line_num, line.rstrip()), "\nProper conditional option syntax is as follows:", "'{} <options> [IF <conditional> [ELSE <options>]]'" .format(jobscript_condoptkey) ])) if jobscript_condopts: cmd = r'{} {}'.format(cmd, ' '.join(jobscript_condopts)) cmd = r'{} "{}"'.format(cmd, jobscript) return cmd
def __init__(self, mindepth=None, maxdepth=float('inf'), outdepth=None, dmatch_maxdepth=None, fmatch=None, fmatch_re=None, fexcl=None, fexcl_re=None, dmatch=None, dmatch_re=None, dexcl=None, dexcl_re=None, fsub=None, dsub=None, copy_method=None, copy_overwrite_files=None, copy_overwrite_dirs=None, copy_overwrite_dmatch=None, sync_tree=False, transplant_tree=False, collapse_tree=False, copy_dryrun=None, copy_quiet=None, copy_debug=None, allow_dir_op=None, mkdir_upon_file_copy=False, allow_nonstd_shprogs=False, copy_shcmd_fmtstr=None, list_function=None, rematch_function=None, resub_function=None, rematch_partial=False ): if any([depth < 0 for depth in [mindepth, maxdepth, outdepth, dmatch_maxdepth] if depth is not None]): raise cerr.InvalidArgumentError("depth arguments must be >= 0") if outdepth is not None: if mindepth is not None and outdepth > mindepth: raise cerr.InvalidArgumentError("`outdepth` valid range: 0 <= `outdepth` <= `mindepth`") if sync_tree or transplant_tree: raise cerr.InvalidArgumentError("`outdepth` and (`sync_tree` or `transplant_tree`) " "arguments are incompatible") if copy_method and copy_shcmd_fmtstr: raise cerr.InvalidArgumentError("`copy_method` and `copy_shcmd_fmtstr` arguments are mutually exclusive") if copy_shcmd_fmtstr is not None: copy_method = copy_shcmd_fmtstr copy_method_is_fmtstr = True else: copy_method_is_fmtstr = False if copy_quiet and copy_dryrun: raise cerr.InvalidArgumentError("`copy_quiet` and `copy_dryrun` arguments are mutually exclusive") if any([item is not None for item in [fmatch, fmatch_re, fexcl, fexcl_re, fsub]]): if allow_dir_op is True: raise cerr.InvalidArgumentError("`allow_dir_op` cannot be True when file pattern arguments are provided") allow_dir_op = False if list_function is not None and list_function not in WALK_LIST_FUNCTION_AVAIL: raise cerr.InvalidArgumentError("`list_function` must be either os.listdir or os.scandir") if mindepth is None: if outdepth is not None: mindepth = outdepth else: mindepth = 0 if outdepth is None: if sync_tree or mindepth == 0: outdepth = 1 elif transplant_tree: outdepth = 0 else: outdepth = mindepth if dmatch_maxdepth is None: dmatch_maxdepth = float('inf') if copy_method is not None else -1 dmatch_maxdepth_specified = False else: dmatch_maxdepth_specified = True list_function_given, rematch_function_given, resub_function_given = [ item is not None for item in [ list_function, rematch_function, resub_function ] ] if list_function is None: list_function = WALK_LIST_FUNCTION_DEFAULT if rematch_function is None: rematch_function = WALK_REMATCH_PARTIAL_FUNCTION_DEFAULT if rematch_partial else WALK_REMATCH_FUNCTION_DEFAULT if resub_function is None: resub_function = WALK_RESUB_FUNCTION_DEFAULT fmatch, fmatch_re, fexcl, fexcl_re, \ dmatch, dmatch_re, dexcl, dexcl_re, \ fsub, dsub = [ item if (item is None or type(item) is list) else (list(item) if type(item) is tuple else [item]) for item in [ fmatch, fmatch_re, fexcl, fexcl_re, dmatch, dmatch_re, dexcl, dexcl_re, fsub, dsub ] ] fsub, dsub = [ item if (item is None or type(item[0]) in (list, tuple)) else [item] for item in [ fsub, dsub ] ] fsub_patt = None dsub_patt = None try: if fsub is not None: fsub_patt, fsub_repl = list(zip(*fsub)) fsub_patt = list(fsub_patt) if len(fsub_patt) != len(fsub_repl): raise ValueError if dsub is not None: dsub_patt, dsub_repl = list(zip(*dsub)) dsub_patt = list(dsub_patt) if len(dsub_patt) != len(dsub_repl): raise ValueError except ValueError: raise cerr.InvalidArgumentError("resub arguments must be provided in (pattern, repl_str) groups") pattern_coll = [ patt_list for patt_list in [ fmatch, fmatch_re, fexcl, fexcl_re, dmatch, dmatch_re, dexcl, dexcl_re, fsub_patt, dsub_patt ] if patt_list is not None ] for patt_list in pattern_coll: for i, pattern in enumerate(patt_list): if patt_list in [fmatch, fexcl, dmatch, dexcl]: pattern = fnmatch_module.translate(pattern) re_pattern = re.compile(pattern) if type(pattern) is str else pattern try: re_pattern_str = re_pattern.pattern except AttributeError: traceback.print_exc() raise cerr.InvalidArgumentError("regex match/sub argument is invalid") if ( not rematch_function_given and rematch_function is re.match and patt_list in [fmatch, dmatch] and not pattern.endswith('$') and not rematch_partial): if type(pattern) is str: re_pattern = re.compile(pattern+'$') else: warning("`re.fullmatch` function is not supported, so `re.match` will be used instead " "and argument regex match pattern '{}' may hit on a partial match") patt_list[i] = re_pattern fname_rematch = [] for patt_list in [fmatch, fmatch_re]: if patt_list is not None: fname_rematch.extend(patt_list) fname_reexcl = [] for patt_list in [fexcl, fexcl_re]: if patt_list is not None: fname_reexcl.extend(patt_list) dname_rematch = [] for patt_list in [dmatch, dmatch_re]: if patt_list is not None: dname_rematch.extend(patt_list) dname_reexcl = [] for patt_list in [dexcl, dexcl_re]: if patt_list is not None: dname_reexcl.extend(patt_list) fname_resub = list(zip(fsub_patt, fsub_repl)) if fsub is not None else None dname_resub = list(zip(dsub_patt, dsub_repl)) if dsub is not None else None if copy_method is not None: if type(copy_method) is psu_cm.CopyMethod: copy_method = copy.copy(copy_method) elif type(copy_method) is str: if copy_method in psu_cm.COPY_METHOD_DICT: copy_method = psu_cm.COPY_METHOD_DICT[copy_method] else: copy_method = psu_cm.CopyMethod(copy_method, copy_shcmd_is_fmtstr=copy_method_is_fmtstr) if copy_method.copy_shprog not in psu_cm.COPY_METHOD_SHPROGS and not allow_nonstd_shprogs: raise cerr.InvalidArgumentError("`copy_method` shell program '{}' is nonstandard and not allowed".format(copy_method.copy_shprog)) else: copy_method = psu_cm.CopyMethod(copy_method) copy_method.set_options( check_srcpath_exists=False, copy_makedirs=False, copy_overwrite_files=copy_overwrite_files, copy_overwrite_dirs=copy_overwrite_dirs, copy_dryrun=copy_dryrun, copy_verbose=(None if copy_quiet is None else (not copy_quiet)), copy_debug=copy_debug ) if allow_dir_op is None and ( copy_method.action_verb.upper() in ('SYMLINKING', 'MOVING') or copy_overwrite_dirs or copy_overwrite_dmatch): allow_dir_op = True if copy_overwrite_dmatch is None: copy_overwrite_dmatch = False self.srcdir = None self.dstdir = None self.mindepth = mindepth self.maxdepth = maxdepth self.outdepth = outdepth self.outdepth_inst = outdepth self.dmatch_maxdepth = dmatch_maxdepth self.dmatch_maxdepth_specified = dmatch_maxdepth_specified self.fname_rematch = fname_rematch self.fname_reexcl = fname_reexcl self.dname_rematch = dname_rematch self.dname_reexcl = dname_reexcl self.fname_resub = fname_resub self.dname_resub = dname_resub self.copy_method = copy_method self.copy_method_inst = None if copy_method is None else copy.copy(self.copy_method) self.copy_overwrite_dmatch = copy_overwrite_dmatch self.collapse_tree = collapse_tree self.collapse_tree_inst = collapse_tree self.allow_dir_op = allow_dir_op self.mkdir_upon_file_copy = mkdir_upon_file_copy self.list_function = list_function self.rematch_function = rematch_function self.resub_function = resub_function self.tftc = None self.tqdm = None self.track_progress = True self.track_initialize_total = True self.track_count_only = False self.track_update_total = True
def walk_simple(srcdir, mindepth=1, maxdepth=float('inf'), track_item=None, track_initialize_total=True, list_function=WALK_LIST_FUNCTION_DEFAULT): if not os.path.isdir(srcdir): raise cerr.InvalidArgumentError("`srcdir` directory does not exist: {}".format(srcdir)) if mindepth < 0 or maxdepth < 0: raise cerr.InvalidArgumentError("depth arguments must be >= 0") if track_item is not None: if not imported_tqdm: raise cerr.InvalidArgumentError("Python package 'tqdm' must be available to use `track_item` option") if track_item not in WALK_TRACK_CHOICES: raise cerr.InvalidArgumentError("`track_item` argument must be one of {}, " "but was {}".format(WALK_TRACK_CHOICES, track_item)) track_item_unit = WALK_TRACK_ITEM_UNIT_DICT[track_item] else: track_item_unit = None if maxdepth == 0: track_initialize_total = False srcdir = os.path.abspath(srcdir) if track_item is not None: if track_initialize_total: print("First counting {}s to process in directory: {}".format( track_item_unit, srcdir )) my_tftc = TrackFileTreeCount(track_item) my_tqdm = tqdm(total=0, unit=track_item_unit, disable=False) # my_tqdm = tqdm(total=0, unit=track_item_unit, disable=False) if not track_initialize_total else None if track_initialize_total: if my_tqdm is not None: my_tqdm.update(0) exhaust( _walk_simple(srcdir, 1, mindepth, maxdepth, list_function, my_tftc, my_tqdm, update_total_count=True) ) item_total, item_est = my_tftc.get_item_count_estimate() if my_tqdm is not None: my_tqdm.close() print("Now processing {}s in directory: {}".format( track_item_unit, srcdir )) my_tqdm = tqdm(total=item_total, unit=track_item_unit, disable=False) my_tftc = TrackFileTreeCount( track_item, initial_file_estimate=my_tftc.total_file_estimate, initial_folder_estimate=my_tftc.total_folder_estimate, track_estimates=False ) my_tqdm.update(0) else: my_tftc = None my_tqdm = None if mindepth == 0: if my_tqdm is not None and track_item in (WALK_TRACK_DIRS, WALK_TRACK_BOTH): my_tqdm.total += 1 my_tqdm.update(0) updir = os.path.dirname(srcdir) srcdname = os.path.basename(srcdir) yield updir, [srcdname], [] if my_tqdm is not None and track_item in (WALK_TRACK_DIRS, WALK_TRACK_BOTH): my_tqdm.update(1) for x in _walk_simple(srcdir, 1, mindepth, maxdepth, list_function, my_tftc, my_tqdm, (not track_initialize_total)): yield x if my_tqdm is not None: my_tqdm.close()
def find( srcdir, dstdir=None, vreturn=None, vyield=None, print_findings=False, mindepth=None, maxdepth=float('inf'), outdepth=None, dmatch_maxdepth=None, fmatch=None, fmatch_re=None, fexcl=None, fexcl_re=None, dmatch=None, dmatch_re=None, dexcl=None, dexcl_re=None, fsub=None, dsub=None, copy_method=None, copy_overwrite_files=None, copy_overwrite_dirs=None, copy_overwrite_dmatch=None, sync_tree=False, transplant_tree=False, collapse_tree=False, copy_dryrun=None, copy_quiet=None, copy_debug=None, allow_dir_op=None, mkdir_upon_file_copy=False, allow_nonstd_shprogs=False, copy_shcmd_fmtstr=None, list_function=None, rematch_function=None, resub_function=None, rematch_partial=False ): if vreturn is None and vyield is None: ffilter = ([arg is not None and len(arg) != 0 for arg in [fmatch, fmatch_re, fexcl, fexcl_re, fsub]].count(True) > 0) dfilter = ([arg is not None and len(arg) != 0 for arg in [dmatch, dmatch_re, dexcl, dexcl_re, dsub]].count(True) > 0) if ffilter and dfilter: vreturn = FIND_RETURN_MIX elif ffilter: vreturn = FIND_RETURN_FILES elif dfilter: vreturn = FIND_RETURN_DIRS else: vreturn = FIND_RETURN_MIX return_items = [item_list for item_list in [vreturn, vyield] if item_list is not None] if len(return_items) != 1: raise cerr.InvalidArgumentError("One and only one of (`vreturn`, `vyield`) arguments must be provided") if type(return_items[0]) in (tuple, list): return_items = list(return_items[0]) for item in return_items: if item not in FIND_RETURN_CHOICES: raise cerr.InvalidArgumentError("`vreturn`/`vyield` string arguments must be one of {}, " "but argument was {}".format(FIND_RETURN_CHOICES, return_items)) if 1 <= len(set(return_items)) <= 2: pass else: raise cerr.InvalidArgumentError("`vreturn`/`vyield` argument contains duplicate items") return_mix = (FIND_RETURN_MIX in return_items) return_mix_only = (return_items == [FIND_RETURN_MIX]) dirs_all = [] files_all = [] mix_all = [] def _find_iter(): for rootdir, dnames, fnames in _walk( srcdir, dstdir, mindepth, maxdepth, outdepth, dmatch_maxdepth, fmatch, fmatch_re, fexcl, fexcl_re, dmatch, dmatch_re, dexcl, dexcl_re, fsub, dsub, copy_method, copy_overwrite_files, copy_overwrite_dirs, copy_overwrite_dmatch, sync_tree, transplant_tree, collapse_tree, copy_dryrun, copy_quiet, copy_debug, allow_dir_op, mkdir_upon_file_copy, allow_nonstd_shprogs, copy_shcmd_fmtstr, list_function, rematch_function, resub_function, rematch_partial ): dirs = [os.path.join(rootdir, dn) for dn in dnames] if (FIND_RETURN_DIRS in return_items or return_mix) else None files = [os.path.join(rootdir, fn) for fn in fnames] if (FIND_RETURN_FILES in return_items or return_mix) else None if return_mix: mix = dirs if return_mix_only else list(dirs) mix.extend(files) if return_mix_only: dirs, files = None, None else: mix = None if print_findings: if mix: for p in mix: sys.stdout.write(p+'\n') else: if dirs: for d in dirs: sys.stdout.write(d+'\n') if files: for f in files: sys.stdout.write(f+'\n') if vreturn: if dirs: dirs_all.extend(dirs) if files: files_all.extend(files) if mix: mix_all.extend(mix) if vyield: if len(return_items) == 1: item = return_items[0] yield_results = files if item == FIND_RETURN_FILES else (dirs if item == FIND_RETURN_DIRS else mix) for p in yield_results: yield p else: yield_results = [] for item in return_items: yield_results.append(files if item == FIND_RETURN_FILES else (dirs if item == FIND_RETURN_DIRS else mix)) yield yield_results if vyield: return _find_iter() if vreturn: exhaust(_find_iter()) if len(return_items) == 1: item = return_items[0] return_results = files_all if item == FIND_RETURN_FILES else (dirs_all if item == FIND_RETURN_FILES else mix_all) else: return_results = [] for item in return_items: return_results.append(files_all if item == FIND_RETURN_FILES else (dirs_all if item == FIND_RETURN_FILES else mix_all)) return return_results
def execute_shell_command(cmd_str=None, tokenize_cmd=False, arg_list=[], shell=None, cwd=None, env=None, executable=None, bufsize=-1, stdin=None, stdout=None, stderr=None, send_stderr_to_stdout=False, quiet=False, return_streams=False, return_popen=False, success_error_codes=[0], ignore_failure=False, throw_exception_in_failure=True, print_stderr_in_failure=True, print_failure_info=False, print_begin_info=False, print_end_info=False): if [cmd_str is not None, len(arg_list) > 0].count(True) != 1: raise cerr.InvalidArgumentError( "Only one of (`cmd_str`, `arg_list`) arguments must be provide") if return_streams and return_popen: raise cerr.InvalidArgumentError( "Only one of (`return_streams`, `return_popen`) arguments may be provided" ) if cmd_str is not None: args = shlex.split(cmd_str) if tokenize_cmd else cmd_str if shell is None: shell = True else: args = arg_list if shell is None: shell = False cmd_str = ' '.join(arg_list) if quiet or return_streams: if stdout is None: stdout = subprocess.PIPE if stderr is None and not send_stderr_to_stdout: stderr = subprocess.PIPE if send_stderr_to_stdout: if stderr is not None: raise cerr.InvalidArgumentError( "`stderr` argument must be None when `send_stderr_to_stdout` argument is True" ) stderr = subprocess.STDOUT proc = subprocess.Popen(args, bufsize=bufsize, executable=executable, stdin=stdin, stdout=stdout, stderr=stderr, shell=shell, cwd=cwd, env=env, universal_newlines=True) proc_pid = proc.pid if print_begin_info: print('Beginning external call (PID {}): """ {} """'.format( proc_pid, cmd_str)) stdout, stderr = proc.communicate() return_code = proc.returncode if not quiet: if stdout is not None: sys.stdout.write(stdout) if stderr is not None: sys.stderr.write(stderr) print_stderr_in_failure = False if return_code not in success_error_codes and not ignore_failure: errmsg = 'External call (PID {}) failed with non-zero exit status ({}): """ {} """'.format( proc_pid, return_code, cmd_str) if throw_exception_in_failure: raise cerr.ExternalError(errmsg) if print_stderr_in_failure and stderr is not None: sys.stderr.write(stderr) if print_failure_info: eprint(print_failure_info) if print_end_info: print("External call (PID {}) completed successfully".format(proc_pid)) if return_popen: return proc else: return (return_code, stdout, stderr) if return_streams else return_code
def read(self, tasklist_file_or_buff, col_delim=',', uniform_dtype=str, col_argname_dtype_def=None, # collection of tuples like ((0, str)) args=None, col_argstr_def=None, ncol_min=None, ncol_max=None, ncol_strict=True, expect_header=None, check_header_for_argnames=None, allow_missing_header=None, ncol_strict_header_separate=False): header_items = None task_array = [] if col_argname_dtype_def is not None and col_argstr_def is not None: raise cerr.InvalidArgumentError( "`col_argname_dtype_def` and `col_argstr_def` arguments are mutually exclusive" ) if args is None and col_argname_dtype_def is None and col_argstr_def is None: if expect_header is None: expect_header = False if check_header_for_argnames is True or allow_missing_header is True: raise cerr.InvalidArgumentError( "`check_header_for_argnames` or `allow_missing_header` can only be True " "when either `col_argname_dtype_def` or `col_argstr_def` are provided" ) check_header_for_argnames = False allow_missing_header = False else: if expect_header is None: expect_header = True if check_header_for_argnames is None: check_header_for_argnames = True if col_argname_dtype_def is None and col_argstr_def is None: if allow_missing_header is True: raise cerr.InvalidArgumentError( "`allow_missing_header` can only be True " "when either `col_argname_dtype_def` or `col_argstr_def` are provided" ) allow_missing_header = False elif allow_missing_header is None: allow_missing_header = True col_argname_dtype_def_derived = None if col_argname_dtype_def is not None: col_argname_dtype_def_derived = col_argname_dtype_def elif col_argstr_def is not None: if args is None: raise cerr.InvalidArgumentError("`args` argument must be provided along with `col_argstr_def`") col_argname_dtype_def_derived = [(argstr, args.argstr2argtype[argstr]) for argstr in col_argstr_def] with open(tasklist_file_or_buff, 'r') as tasklist_fp: first_line = tasklist_fp.readline().strip() if first_line == '': return first_line_items = [item.strip() for item in first_line.split(col_delim)] line_num = 1 if not expect_header: task_line = first_line task_line_items = first_line_items else: header_line = first_line header_items = first_line_items if check_header_for_argnames: assert col_argname_dtype_def is not None or col_argstr_def is not None assert col_argname_dtype_def_derived is not None header_argname_set = set(header_items) col_argname_def_list = [argname for argname, argtype in col_argname_dtype_def_derived] col_argname_def_set = set(col_argname_def_list) if header_argname_set.issubset(col_argname_def_set): argname_dtype_dict = {argname: dtype for argname, dtype in col_argname_dtype_def_derived} col_argname_dtype_def_derived = [(argname, argname_dtype_dict[argname]) for argname in header_items] elif args is not None and header_argname_set.issubset(set(args.all_argstr)): if col_argname_dtype_def is not None: warning( "Argument names in header line ({}) of tasklist file ({}) are not a " "subset of `col_argname_dtype_def` argument names ({}), but are instead " "a subset of script ArgumentPasser argument strings which will be " "leveraged instead.".format( header_line, tasklist_file_or_buff, col_argname_def_set ) ) elif col_argstr_def is not None: warning( "Argument strings in header line ({}) of tasklist file ({}) are not " "the expected set of `col_argstr_def` argument strings ({})".format( header_line, tasklist_file_or_buff, col_argname_def_set ) ) col_argname_dtype_def_derived = [(argstr, args.argstr2argtype[argstr]) for argstr in header_items] elif allow_missing_header: if col_argname_dtype_def is not None: warning( "Tasklist file ({}) does not have an acceptable header line, " "so it will be assumed that columns follow the provided " "`col_argname_dtype_def` order precisely as given: {}".format( tasklist_file_or_buff, col_argname_def_list ) ) elif col_argstr_def is not None: warning( "Tasklist file ({}) does not have an acceptable header line, " "so it will be assumed that columns follow the provided " "`col_argstr_def` order precisely as given: {}".format( tasklist_file_or_buff, col_argname_def_list ) ) header_items = None else: raise cerr.ScriptArgumentError("Tasklist file does not have an acceptable header line: {}".format(tasklist_file_or_buff)) if header_items is not None: ncol_header = len(header_items) if ncol_min is not None and ncol_header < ncol_min: raise cerr.ScriptArgumentError( "Tasklist file header line has {} columns, less than `ncol_min` required minimum ({}): {}".format( ncol_header, ncol_min, tasklist_file_or_buff )) if ncol_max is not None and ncol_header > ncol_max: raise cerr.ScriptArgumentError( "Tasklist file header line has {} columns, more than `ncol_max` required maximum ({}): {}".format( ncol_header, ncol_max, tasklist_file_or_buff )) if ncol_max is not None and ncol_header < ncol_max: raise cerr.ScriptArgumentError( "Tasklist file header line has {} columns, less than `ncol_max` possible maximum ({}): {}".format( ncol_header, ncol_max, tasklist_file_or_buff )) if header_items is None: task_line = first_line task_line_items = first_line_items else: task_line = tasklist_fp.readline().strip() task_line_items = [item.strip() for item in task_line.split(col_delim)] if len(task_line_items) != len(header_items): errmsg = ("Tasklist file ({}) number of columns mismatch between header ({}) " "and body ({})".format(tasklist_file_or_buff, len(header_items), len(task_line_items))) if ncol_strict and not ncol_strict_header_separate: raise cerr.ScriptArgumentError(errmsg) else: warning(errmsg) if col_argname_dtype_def_derived is not None: col_dtype_def_derived = [dtype for argname, dtype in col_argname_dtype_def_derived] # if ncol_min is None: # ncol_min = len(col_argname_dtype_def_derived) if ncol_max is None: ncol_max = len(col_argname_dtype_def_derived) else: col_dtype_def_derived = None if ncol_strict: ncol_body = len(task_line_items) ncol_body_max = float('-inf') while task_line != '': line_num += 1 ncol_task_line = len(task_line_items) if ncol_strict and ncol_task_line != ncol_body: raise cerr.ScriptArgumentError( "Tasklist file {}, line {}: Number of columns ({}) breaks from constant " "number of columns ({}) established prior to this line (`ncol_strict=True`)".format( tasklist_file_or_buff, line_num, ncol_task_line, ncol_body )) elif ncol_min is not None and ncol_task_line < ncol_min: raise cerr.ScriptArgumentError( "Tasklist file {}, line {}: Number of columns ({}) is less than `ncol_min` " "required minimum ({})".format( tasklist_file_or_buff, line_num, ncol_task_line, ncol_min )) elif ncol_max is not None and ncol_task_line > ncol_max: raise cerr.ScriptArgumentError( "Tasklist file {}, line {}: Number of columns ({}) is more than `ncol_max` " "required maximum ({})".format( tasklist_file_or_buff, line_num, ncol_task_line, ncol_max )) if ncol_task_line > ncol_body_max: ncol_body_max = ncol_task_line if col_dtype_def_derived is not None: task_line_values = [col_dtype_def_derived[col_idx](item) for col_idx, item in enumerate(task_line_items)] else: task_line_values = [uniform_dtype(item) for item in task_line_items] task_array.append(task_line_values) task_line = tasklist_fp.readline().strip() task_line_items = [item.strip() for item in task_line.split(col_delim)] if col_argname_dtype_def_derived is None: col_argname_dtype_def_derived = [(argname, uniform_dtype) for argname in range(ncol_body_max)]
def walk(self, srcdir, dstdir=None, copy_overwrite_files=None, copy_overwrite_dirs=None, sync_tree=False, transplant_tree=False, collapse_tree=None, copy_dryrun=None, copy_quiet=None, copy_debug=None, track_progress=None, track_initialize_total=None): self.track_count_only = False self.track_update_total = True if collapse_tree is None: collapse_tree = self.collapse_tree if track_progress is None: track_progress = self.track_progress if track_initialize_total is None: track_initialize_total = self.track_initialize_total if sync_tree: self.outdepth_inst = 1 elif transplant_tree: self.outdepth_inst = 0 else: self.outdepth_inst = self.outdepth srcdir = os.path.normpath(os.path.expanduser(srcdir)) if not os.path.isdir(srcdir): raise cerr.InvalidArgumentError("`srcdir` directory does not exist: {}".format(srcdir)) if dstdir is not None: dstdir = os.path.normpath(os.path.expanduser(dstdir)) if self.outdepth_inst == 0: dstdir = os.path.join(dstdir, os.path.basename(srcdir)) self.srcdir = srcdir self.dstdir = dstdir self.collapse_tree_inst = collapse_tree if self.copy_method is None: self.copy_method_inst = None else: self.copy_method_inst = copy.copy(self.copy_method) self.copy_method_inst.set_options( copy_overwrite_files=copy_overwrite_files, copy_overwrite_dirs=copy_overwrite_dirs, copy_dryrun=copy_dryrun, copy_verbose=(None if copy_quiet is None else (not copy_quiet)), copy_debug=copy_debug ) depth = 0 dmatch_depth = -1 if not self.dname_rematch else 0 if dmatch_depth == 0: srcdname = os.path.basename(self.srcdir) srcdname_match = True if self.dname_reexcl: for re_pattern in self.dname_reexcl: srcdname_match = (not self.rematch_function(re_pattern, srcdname)) if not srcdname_match: break if not srcdname_match: return if self.dname_rematch and srcdname_match: srcdname_match = False for re_pattern in self.dname_rematch: srcdname_match = self.rematch_function(re_pattern, srcdname) if srcdname_match: break if srcdname_match: dmatch_depth = 1 if self.allow_dir_op and dmatch_depth != 0 and (self.mindepth <= depth <= self.maxdepth) and self.outdepth_inst in (-1, 0): if not self.copy_method_inst.dryrun: os.makedirs(os.path.dirname(os.path.normpath(self.dstdir)), exist_ok=True) copy_success = self.copy_method_inst.copy( self.srcdir, self.dstdir, srcpath_is_file=False, overwrite_dir=(self.copy_method_inst.copy_overwrite_dirs or (self.copy_overwrite_dmatch and dmatch_depth == 1)), ) return if track_progress and imported_tqdm: if track_initialize_total: print("First counting files to process in directory: {}".format(self.srcdir)) self.tftc = TrackFileTreeCount(WALK_TRACK_FILES) self.tqdm = tqdm(total=0, unit=WALK_TRACK_ITEM_UNIT_DICT[WALK_TRACK_FILES], disable=False) # self.tqdm = tqdm(total=0, unit=WALK_TRACK_ITEM_UNIT_DICT[WALK_TRACK_FILES], disable=False) if not track_initialize_total else None else: self.tftc = None self.tqdm = None if self.copy_method_inst is not None and self.dstdir is not None and not os.path.isdir(self.dstdir): if not self.copy_method_inst.dryrun: os.makedirs(self.dstdir) depth = 1 if self.tftc is not None: if track_initialize_total: self.track_count_only = True self.track_update_total = True dryrun_backup = self.copy_method_inst.dryrun self.copy_method_inst.dryrun = True depth_backup = depth if self.tqdm is not None: self.tqdm.update(0) exhaust(self._walk(self.srcdir, self.dstdir, depth, dmatch_depth)) item_total, item_est = self.tftc.get_item_count_estimate() if self.tqdm is not None: self.tqdm.close() print("Now processing files in directory: {}".format(self.srcdir)) self.tqdm = tqdm(total=item_total, unit=WALK_TRACK_ITEM_UNIT_DICT[WALK_TRACK_FILES], disable=False) self.tftc = TrackFileTreeCount( WALK_TRACK_FILES, initial_file_estimate=self.tftc.total_file_estimate, initial_folder_estimate=self.tftc.total_folder_estimate, track_estimates=False ) self.tqdm.update(0) self.track_count_only = False self.track_update_total = False self.copy_method_inst.dryrun = dryrun_backup depth = depth_backup self.tqdm.update(0) for x in self._walk(self.srcdir, self.dstdir, depth, dmatch_depth): yield x self.track_count_only = False self.track_update_total = True if self.tftc is not None: self.tftc = None if self.tqdm is not None: self.tqdm.close() self.tqdm = None