def get_as_list(self, *argstrs):
     if len(argstrs) < 1:
         raise cerr.InvalidArgumentError(
             "One or more argument strings must be provided")
     elif len(argstrs) == 1 and type(argstrs[0]) in (tuple, list):
         argstrs = argstrs[0]
     argstrs_invalid = set(argstrs).difference(set(self.argstr2varstr))
     if argstrs_invalid:
         raise cerr.InvalidArgumentError(
             "This {} object does not have the following argument strings: {}"
             .format(type(self).__name__, list(argstrs_invalid)))
     values = [
         self.vars_dict[self.argstr2varstr[argstr]] for argstr in argstrs
     ]
     return values
 def _jobscript_condopt_eval(self, condopt_expr, out_type):
     if out_type not in (str, eval):
         raise cerr.InvalidArgumentError(
             "`out_type` must be either str or eval")
     vars_dict = self.vars_dict
     for varstr in sorted(vars_dict.keys(), key=len, reverse=True):
         possible_substr = {
             '%' + s
             for s in [
                 varstr, self.varstr2argstr[varstr],
                 self.varstr2argstr[varstr].lstrip('-')
             ]
         }
         possible_substr = possible_substr.union(
             {s.lower()
              for s in possible_substr},
             {s.upper()
              for s in possible_substr})
         for substr in possible_substr:
             if substr in condopt_expr:
                 replstr = str(
                     vars_dict[varstr]
                 ) if out_type is str else "vars_dict['{}']".format(varstr)
                 condopt_expr = condopt_expr.replace(substr, replstr)
                 break
     return out_type(condopt_expr)
Beispiel #3
0
    def __init__(self, report_item=None,
                 initial_file_total=0,
                 initial_folder_total=0,
                 initial_file_estimate=0,
                 initial_folder_estimate=0,
                 track_estimates=True):
        if report_item is not None and report_item not in WALK_TRACK_CHOICES:
            raise cerr.InvalidArgumentError("`report_item` argument must be one of {}, "
                                            "but was {}".format(WALK_TRACK_CHOICES, report_item))
        self.report_item = report_item
        self.track_estimates = track_estimates

        self.total_folder_count = initial_folder_total
        self.total_file_count = initial_file_total
        self.total_folder_estimate = initial_folder_estimate
        self.total_file_estimate = initial_file_estimate

        if track_estimates:

            self.max_depth_found = 0

            self.nentries_at_depth = [1]
            self.nfolders_at_depth = [1]
            self.nfiles_at_depth = [0]

            # self.nfolders_per_entry_at_depth = [[1]]
            # self.nfiles_per_entry_at_depth = [[0]]
            self.nfolders_estimate_at_depth = [1]
            self.nfiles_estimate_at_depth = [0]
Beispiel #4
0
def _walk(
    srcdir, dstdir=None,
    mindepth=None, maxdepth=float('inf'), outdepth=None, dmatch_maxdepth=None,
    fmatch=None, fmatch_re=None, fexcl=None, fexcl_re=None,
    dmatch=None, dmatch_re=None, dexcl=None, dexcl_re=None,
    fsub=None, dsub=None,
    copy_method=None, copy_overwrite_files=None, copy_overwrite_dirs=None, copy_overwrite_dmatch=None,
    sync_tree=False, transplant_tree=False, collapse_tree=False,
    copy_dryrun=None, copy_quiet=None, copy_debug=None,
    allow_dir_op=None,
    mkdir_upon_file_copy=False,
    allow_nonstd_shprogs=False,
    copy_shcmd_fmtstr=None,
    list_function=None,
    rematch_function=None,
    resub_function=None,
    rematch_partial=False
):
    if not os.path.isdir(srcdir):
        raise cerr.InvalidArgumentError("`srcdir` directory does not exist: {}".format(srcdir))
    # if dstdir is not None and copy_method is None:
    #     raise InvalidArgumentError("`copy_method` must be provided to utilize `dstdir` argument")
    if dstdir is None and (copy_method or copy_quiet or copy_dryrun or copy_shcmd_fmtstr):
        raise cerr.InvalidArgumentError("`dstdir` must be provided to use file copy options")
    walk_object = WalkObject(
        mindepth, maxdepth, outdepth, dmatch_maxdepth,
        fmatch, fmatch_re, fexcl, fexcl_re,
        dmatch, dmatch_re, dexcl, dexcl_re,
        fsub, dsub,
        copy_method, copy_overwrite_files, copy_overwrite_dirs, copy_overwrite_dmatch,
        sync_tree, transplant_tree, collapse_tree,
        copy_dryrun, copy_quiet, copy_debug,
        allow_dir_op,
        mkdir_upon_file_copy,
        allow_nonstd_shprogs,
        copy_shcmd_fmtstr,
        list_function,
        rematch_function,
        resub_function,
        rematch_partial
    )
    if mindepth == 0:
        updir = os.path.dirname(srcdir)
        srcdname = os.path.basename(srcdir)
        yield updir, [srcdname], []
    for x in walk_object.walk(srcdir, dstdir):
        yield x
Beispiel #5
0
def copy_tree(
    srcdir, dstdir, copy_method='copy',
    sync_tree=False, transplant_tree=False, collapse_tree=False,
    overwrite_files=False, overwrite_dirs=False, overwrite_dmatch=False,
    mindepth=None, maxdepth=float('inf'), dmatch_maxdepth=None,
    fmatch=None, fmatch_re=None, fexcl=None, fexcl_re=None,
    dmatch=None, dmatch_re=None, dexcl=None, dexcl_re=None,
    fsub=None, dsub=None,
    vreturn=None, vyield=None, print_findings=False,
    dryrun=False, quiet=False, debug=False,
    allow_dir_op=None,
    mkdir_upon_file_copy=False,
    allow_nonstd_shprogs=False,
    copy_shcmd_fmtstr=None,
    list_function=None,
    rematch_function=None,
    resub_function=None,
    rematch_partial=False
):
    if dstdir is None:
        raise cerr.InvalidArgumentError("`dstdir` cannot be None")
    if copy_method is None:
        raise cerr.InvalidArgumentError("`copy_method` cannot be None")
    find(
        srcdir, dstdir,
        vreturn, vyield, print_findings,
        mindepth, maxdepth, dmatch_maxdepth,
        fmatch, fmatch_re, fexcl, fexcl_re,
        dmatch, dmatch_re, dexcl, dexcl_re,
        fsub, dsub,
        copy_method, overwrite_files, overwrite_dirs, overwrite_dmatch,
        sync_tree, transplant_tree, collapse_tree,
        dryrun, quiet, debug,
        allow_dir_op,
        mkdir_upon_file_copy,
        allow_nonstd_shprogs,
        copy_shcmd_fmtstr,
        list_function,
        rematch_function,
        resub_function,
        rematch_partial
    )
Beispiel #6
0
def datetime_to_datestring(dt):
    if type(dt) not in datetime_date_types:
        raise cerr.InvalidArgumentError(
            "`dt` type must be one of {}, but was {}".format(
                datetime_date_types, type(dt)
            )
        )
    return "{:04}-{:02}-{:02}".format(
        dt.year,
        dt.month,
        dt.day
    )
Beispiel #7
0
def datetime_to_timestring(dt):
    if type(dt) is not datetime.datetime:
        raise cerr.InvalidArgumentError(
            "`dt` type must be {}, but was {}".format(
                datetime.datetime, type(dt)
            )
        )
    return "{:02}:{:02}:{:02}".format(
        dt.hour,
        dt.minute,
        dt.second
    )
 def set(self, argstrs, newval=None):
     if type(argstrs) in (tuple, list) and type(newval) in (
             tuple, list) and len(argstrs) == len(newval):
         argstr_list = argstrs
         for argstr_i, newval_i in list(zip(argstrs, newval)):
             if argstr_i not in self.argstr2varstr:
                 raise cerr.InvalidArgumentError(
                     "This {} object has no '{}' argument string".format(
                         type(self).__name__, argstr_i))
             self.vars_dict[self.argstr2varstr[
                 argstr_i]] = self.argstr2argtype[argstr_i](newval_i)
     else:
         argstr_list = argstrs if type(argstrs) in (tuple,
                                                    list) else [argstrs]
         for argstr in argstr_list:
             if argstr not in self.argstr2varstr:
                 raise cerr.InvalidArgumentError(
                     "This {} object has no '{}' argument string".format(
                         type(self).__name__, argstr))
             if newval is None:
                 action = self.varstr2action[self.argstr2varstr[argstr]]
                 acttype = type(action)
                 if acttype is argparse._StoreAction and 'function argtype_bool_plus' in str(
                         action.type):
                     newval = True
                 elif acttype in (argparse._StoreTrueAction,
                                  argparse._StoreFalseAction):
                     newval = (acttype is argparse._StoreTrueAction)
                 else:
                     raise cerr.InvalidArgumentError(
                         "Setting non-boolean argument string '{}' requires "
                         "a non-None `newval` value".format(argstr))
             self.vars_dict[self.argstr2varstr[argstr]] = newval
     if set(argstr_list).issubset(set(self.pos_argstr)):
         self._update_cmd()
     else:
         self._update_cmd_base()
 def unset(self, *argstrs):
     if len(argstrs) < 1:
         raise cerr.InvalidArgumentError(
             "One or more argument strings must be provided")
     elif len(argstrs) == 1 and type(argstrs[0]) in (tuple, list):
         argstrs = argstrs[0]
     for argstr in argstrs:
         action = self.varstr2action[self.argstr2varstr[argstr]]
         acttype = type(action)
         if acttype is argparse._StoreAction and 'function argtype_bool_plus' in str(
                 action.type):
             newval = False
         elif acttype in (argparse._StoreTrueAction,
                          argparse._StoreFalseAction):
             newval = (acttype is argparse._StoreFalseAction)
         else:
             newval = None
         self.vars_dict[self.argstr2varstr[argstr]] = newval
     if set(argstrs).issubset(set(self.pos_argstr)):
         self._update_cmd()
     else:
         self._update_cmd_base()
Beispiel #10
0
def datetime_to_timestamp(dt):
    dt_type = type(dt)
    if dt_type is datetime.datetime:
        return "{:04}{:02}{:02}_{:02}{:02}{:02}".format(
            dt.year,
            dt.month,
            dt.day,
            dt.hour,
            dt.minute,
            dt.second
        )
    elif dt_type is datetime.date:
        return "{:04}{:02}{:02}".format(
            dt.year,
            dt.month,
            dt.day
        )
    else:
        raise cerr.InvalidArgumentError(
            "`dt` type must be one of {}, but was {}".format(
                datetime_date_types, dt_type
            ))
Beispiel #11
0
    def get_jobsubmit_cmd(self,
                          scheduler,
                          jobscript=None,
                          jobname=None,
                          time_hr=None,
                          time_min=None,
                          time_sec=None,
                          memory_gb=None,
                          node=None,
                          email=None,
                          envvars=None):
        cmd = None
        cmd_envvars = None
        jobscript_optkey = None

        total_sec = 0
        if time_hr is not None:
            total_sec += time_hr * 3600
        if time_min is not None:
            total_sec += time_min * 60
        if time_sec is not None:
            total_sec += time_sec

        if total_sec == 0:
            time_hms = None
        else:
            m, s = divmod(total_sec, 60)
            h, m = divmod(m, 60)
            time_hms = '{:d}:{:02d}:{:02d}'.format(h, m, s)

        if envvars is not None:
            if type(envvars) in (tuple, list):
                cmd_envvars = ','.join([
                    'p{}="{}"'.format(i, self._escape_comma(a))
                    for i, a in enumerate(envvars)
                ])
            elif type(envvars) == dict:
                cmd_envvars = ','.join([
                    '{}="{}"'.format(var_name, self._escape_comma(var_val))
                    for var_name, var_val in envvars.items()
                ])

        if scheduler == SCHED_PBS:
            cmd = ' '.join([
                'qsub', "-N {}".format(jobname) * (jobname is not None),
                "-l {}".format(','.join([
                    "nodes={}".format(node) if node is not None else '',
                    "walltime={}".format(time_hms) if time_hms is not None else
                    '', "mem={}gb".format(memory_gb)
                    if memory_gb is not None else ''
                ]).strip(',')) if
                (time_hms is not None and memory_gb is not None) else '',
                "-v {}".format(cmd_envvars) if cmd_envvars is not None else ''
                "-m ae" if email else ''
            ])
            jobscript_optkey = '#PBS'

        elif scheduler == SCHED_SLURM:
            cmd = ' '.join([
                'sbatch',
                "--job-name {}".format(jobname) if jobname is not None else '',
                "--time {}".format(time_hms) if time_hms is not None else '',
                "--mem {}G".format(memory_gb) if memory_gb is not None else '',
                "-v {}".format(cmd_envvars) if cmd_envvars is not None else ''
                "--mail-type FAIL,END" if email else '',
                "--mail-user {}".format(email) if type(email) is str else None
            ])
            jobscript_optkey = '#SBATCH'

        if jobscript_optkey is not None:
            jobscript_condoptkey = jobscript_optkey.replace('#', '#CONDOPT_')

            jobscript_condopts = []
            with open(jobscript) as job_script_fp:
                for line_num, line in enumerate(job_script_fp.readlines(), 1):
                    if line.lstrip().startswith(jobscript_condoptkey):

                        cond_ifval = None
                        cond_cond = None
                        cond_elseval = None

                        cond_remain = line.replace(jobscript_condoptkey,
                                                   '').strip()
                        cond_parts = [
                            s.strip() for s in cond_remain.split(' ELSE ')
                        ]
                        if len(cond_parts) == 2:
                            cond_remain, cond_elseval = cond_parts
                        cond_parts = [
                            s.strip() for s in cond_remain.split(' IF ')
                        ]
                        if len(cond_parts) == 2:
                            cond_ifval, cond_cond = cond_parts

                        try:
                            condopt_add = None

                            if cond_ifval is not None and cond_cond is not None:
                                if self._jobscript_condopt_eval(
                                        cond_cond, eval):
                                    condopt_add = self._jobscript_condopt_eval(
                                        cond_ifval, str)
                                elif cond_elseval is not None:
                                    condopt_add = self._jobscript_condopt_eval(
                                        cond_elseval, str)
                            elif cond_elseval is not None:
                                raise SyntaxError
                            elif cond_remain.startswith(
                                    'import') or cond_remain.startswith(
                                        'from'):
                                exec(cond_remain)
                            else:
                                condopt_add = self._jobscript_condopt_eval(
                                    cond_remain, str)

                            if condopt_add is not None:
                                jobscript_condopts.append(condopt_add)

                        except SyntaxError:
                            raise cerr.InvalidArgumentError(' '.join([
                                "Invalid syntax in jobscript conditional option:",
                                "\n  File '{}', line {}: '{}'".format(
                                    jobscript, line_num, line.rstrip()),
                                "\nProper conditional option syntax is as follows:",
                                "'{} <options> [IF <conditional> [ELSE <options>]]'"
                                .format(jobscript_condoptkey)
                            ]))

            if jobscript_condopts:
                cmd = r'{} {}'.format(cmd, ' '.join(jobscript_condopts))

        cmd = r'{} "{}"'.format(cmd, jobscript)

        return cmd
Beispiel #12
0
    def __init__(self,
        mindepth=None, maxdepth=float('inf'), outdepth=None, dmatch_maxdepth=None,
        fmatch=None, fmatch_re=None, fexcl=None, fexcl_re=None,
        dmatch=None, dmatch_re=None, dexcl=None, dexcl_re=None,
        fsub=None, dsub=None,
        copy_method=None, copy_overwrite_files=None, copy_overwrite_dirs=None, copy_overwrite_dmatch=None,
        sync_tree=False, transplant_tree=False, collapse_tree=False,
        copy_dryrun=None, copy_quiet=None, copy_debug=None,
        allow_dir_op=None,
        mkdir_upon_file_copy=False,
        allow_nonstd_shprogs=False,
        copy_shcmd_fmtstr=None,
        list_function=None,
        rematch_function=None,
        resub_function=None,
        rematch_partial=False
    ):
        if any([depth < 0 for depth in [mindepth, maxdepth, outdepth, dmatch_maxdepth] if depth is not None]):
            raise cerr.InvalidArgumentError("depth arguments must be >= 0")
        if outdepth is not None:
            if mindepth is not None and outdepth > mindepth:
                raise cerr.InvalidArgumentError("`outdepth` valid range: 0 <= `outdepth` <= `mindepth`")
            if sync_tree or transplant_tree:
                raise cerr.InvalidArgumentError("`outdepth` and (`sync_tree` or `transplant_tree`) "
                                                "arguments are incompatible")
        if copy_method and copy_shcmd_fmtstr:
            raise cerr.InvalidArgumentError("`copy_method` and `copy_shcmd_fmtstr` arguments are mutually exclusive")
        if copy_shcmd_fmtstr is not None:
            copy_method = copy_shcmd_fmtstr
            copy_method_is_fmtstr = True
        else:
            copy_method_is_fmtstr = False
        if copy_quiet and copy_dryrun:
            raise cerr.InvalidArgumentError("`copy_quiet` and `copy_dryrun` arguments are mutually exclusive")
        if any([item is not None for item in [fmatch, fmatch_re, fexcl, fexcl_re, fsub]]):
            if allow_dir_op is True:
                raise cerr.InvalidArgumentError("`allow_dir_op` cannot be True when file pattern arguments are provided")
            allow_dir_op = False
        if list_function is not None and list_function not in WALK_LIST_FUNCTION_AVAIL:
            raise cerr.InvalidArgumentError("`list_function` must be either os.listdir or os.scandir")

        if mindepth is None:
            if outdepth is not None:
                mindepth = outdepth
            else:
                mindepth = 0

        if outdepth is None:
            if sync_tree or mindepth == 0:
                outdepth = 1
            elif transplant_tree:
                outdepth = 0
            else:
                outdepth = mindepth

        if dmatch_maxdepth is None:
            dmatch_maxdepth = float('inf') if copy_method is not None else -1
            dmatch_maxdepth_specified = False
        else:
            dmatch_maxdepth_specified = True

        list_function_given, rematch_function_given, resub_function_given = [
            item is not None for item in [
                list_function, rematch_function, resub_function
            ]
        ]
        if list_function is None:
            list_function = WALK_LIST_FUNCTION_DEFAULT
        if rematch_function is None:
            rematch_function = WALK_REMATCH_PARTIAL_FUNCTION_DEFAULT if rematch_partial else WALK_REMATCH_FUNCTION_DEFAULT
        if resub_function is None:
            resub_function = WALK_RESUB_FUNCTION_DEFAULT

        fmatch, fmatch_re, fexcl, fexcl_re, \
        dmatch, dmatch_re, dexcl, dexcl_re, \
        fsub, dsub = [
            item if (item is None or type(item) is list) else (list(item) if type(item) is tuple else [item]) for item in [
                fmatch, fmatch_re, fexcl, fexcl_re,
                dmatch, dmatch_re, dexcl, dexcl_re,
                fsub, dsub
            ]
        ]

        fsub, dsub = [
            item if (item is None or type(item[0]) in (list, tuple)) else [item] for item in [
                fsub, dsub
            ]
        ]

        fsub_patt = None
        dsub_patt = None
        try:
            if fsub is not None:
                fsub_patt, fsub_repl = list(zip(*fsub))
                fsub_patt = list(fsub_patt)
                if len(fsub_patt) != len(fsub_repl):
                    raise ValueError
            if dsub is not None:
                dsub_patt, dsub_repl = list(zip(*dsub))
                dsub_patt = list(dsub_patt)
                if len(dsub_patt) != len(dsub_repl):
                    raise ValueError
        except ValueError:
            raise cerr.InvalidArgumentError("resub arguments must be provided in (pattern, repl_str) groups")

        pattern_coll = [
            patt_list for patt_list in [
                fmatch, fmatch_re, fexcl, fexcl_re,
                dmatch, dmatch_re, dexcl, dexcl_re,
                fsub_patt, dsub_patt
            ] if patt_list is not None
        ]

        for patt_list in pattern_coll:
            for i, pattern in enumerate(patt_list):

                if patt_list in [fmatch, fexcl, dmatch, dexcl]:
                    pattern = fnmatch_module.translate(pattern)

                re_pattern = re.compile(pattern) if type(pattern) is str else pattern
                try:
                    re_pattern_str = re_pattern.pattern
                except AttributeError:
                    traceback.print_exc()
                    raise cerr.InvalidArgumentError("regex match/sub argument is invalid")
                if (    not rematch_function_given
                    and rematch_function is re.match and patt_list in [fmatch, dmatch]
                    and not pattern.endswith('$') and not rematch_partial):
                    if type(pattern) is str:
                        re_pattern = re.compile(pattern+'$')
                    else:
                        warning("`re.fullmatch` function is not supported, so `re.match` will be used instead "
                                "and argument regex match pattern '{}' may hit on a partial match")
                patt_list[i] = re_pattern

        fname_rematch = []
        for patt_list in [fmatch, fmatch_re]:
            if patt_list is not None:
                fname_rematch.extend(patt_list)
        fname_reexcl = []
        for patt_list in [fexcl, fexcl_re]:
            if patt_list is not None:
                fname_reexcl.extend(patt_list)
        dname_rematch = []
        for patt_list in [dmatch, dmatch_re]:
            if patt_list is not None:
                dname_rematch.extend(patt_list)
        dname_reexcl = []
        for patt_list in [dexcl, dexcl_re]:
            if patt_list is not None:
                dname_reexcl.extend(patt_list)
        fname_resub = list(zip(fsub_patt, fsub_repl)) if fsub is not None else None
        dname_resub = list(zip(dsub_patt, dsub_repl)) if dsub is not None else None

        if copy_method is not None:
            if type(copy_method) is psu_cm.CopyMethod:
                copy_method = copy.copy(copy_method)
            elif type(copy_method) is str:
                if copy_method in psu_cm.COPY_METHOD_DICT:
                    copy_method = psu_cm.COPY_METHOD_DICT[copy_method]
                else:
                    copy_method = psu_cm.CopyMethod(copy_method, copy_shcmd_is_fmtstr=copy_method_is_fmtstr)
                    if copy_method.copy_shprog not in psu_cm.COPY_METHOD_SHPROGS and not allow_nonstd_shprogs:
                        raise cerr.InvalidArgumentError("`copy_method` shell program '{}' is nonstandard and not allowed".format(copy_method.copy_shprog))
            else:
                copy_method = psu_cm.CopyMethod(copy_method)
            copy_method.set_options(
                check_srcpath_exists=False,
                copy_makedirs=False,
                copy_overwrite_files=copy_overwrite_files,
                copy_overwrite_dirs=copy_overwrite_dirs,
                copy_dryrun=copy_dryrun,
                copy_verbose=(None if copy_quiet is None else (not copy_quiet)),
                copy_debug=copy_debug
            )

        if allow_dir_op is None and (   copy_method.action_verb.upper() in ('SYMLINKING', 'MOVING')
                                     or copy_overwrite_dirs or copy_overwrite_dmatch):
            allow_dir_op = True
        if copy_overwrite_dmatch is None:
            copy_overwrite_dmatch = False

        self.srcdir = None
        self.dstdir = None
        self.mindepth = mindepth
        self.maxdepth = maxdepth
        self.outdepth = outdepth
        self.outdepth_inst = outdepth
        self.dmatch_maxdepth = dmatch_maxdepth
        self.dmatch_maxdepth_specified = dmatch_maxdepth_specified
        self.fname_rematch = fname_rematch
        self.fname_reexcl = fname_reexcl
        self.dname_rematch = dname_rematch
        self.dname_reexcl = dname_reexcl
        self.fname_resub = fname_resub
        self.dname_resub = dname_resub
        self.copy_method = copy_method
        self.copy_method_inst = None if copy_method is None else copy.copy(self.copy_method)
        self.copy_overwrite_dmatch = copy_overwrite_dmatch
        self.collapse_tree = collapse_tree
        self.collapse_tree_inst = collapse_tree
        self.allow_dir_op = allow_dir_op
        self.mkdir_upon_file_copy = mkdir_upon_file_copy
        self.list_function = list_function
        self.rematch_function = rematch_function
        self.resub_function = resub_function
        self.tftc = None
        self.tqdm = None
        self.track_progress = True
        self.track_initialize_total = True
        self.track_count_only = False
        self.track_update_total = True
Beispiel #13
0
def walk_simple(srcdir, mindepth=1, maxdepth=float('inf'),
                track_item=None, track_initialize_total=True,
                list_function=WALK_LIST_FUNCTION_DEFAULT):

    if not os.path.isdir(srcdir):
        raise cerr.InvalidArgumentError("`srcdir` directory does not exist: {}".format(srcdir))
    if mindepth < 0 or maxdepth < 0:
        raise cerr.InvalidArgumentError("depth arguments must be >= 0")
    if track_item is not None:
        if not imported_tqdm:
            raise cerr.InvalidArgumentError("Python package 'tqdm' must be available to use `track_item` option")
        if track_item not in WALK_TRACK_CHOICES:
            raise cerr.InvalidArgumentError("`track_item` argument must be one of {}, "
                                            "but was {}".format(WALK_TRACK_CHOICES, track_item))
        track_item_unit = WALK_TRACK_ITEM_UNIT_DICT[track_item]
    else:
        track_item_unit = None
    if maxdepth == 0:
        track_initialize_total = False

    srcdir = os.path.abspath(srcdir)
    if track_item is not None:
        if track_initialize_total:
            print("First counting {}s to process in directory: {}".format(
                track_item_unit, srcdir
            ))
        my_tftc = TrackFileTreeCount(track_item)
        my_tqdm = tqdm(total=0, unit=track_item_unit, disable=False)
        # my_tqdm = tqdm(total=0, unit=track_item_unit, disable=False) if not track_initialize_total else None

        if track_initialize_total:
            if my_tqdm is not None:
                my_tqdm.update(0)
            exhaust(
                _walk_simple(srcdir, 1, mindepth, maxdepth, list_function,
                             my_tftc, my_tqdm, update_total_count=True)
            )
            item_total, item_est = my_tftc.get_item_count_estimate()
            if my_tqdm is not None:
                my_tqdm.close()

            print("Now processing {}s in directory: {}".format(
                track_item_unit, srcdir
            ))
            my_tqdm = tqdm(total=item_total, unit=track_item_unit, disable=False)
            my_tftc = TrackFileTreeCount(
                track_item,
                initial_file_estimate=my_tftc.total_file_estimate,
                initial_folder_estimate=my_tftc.total_folder_estimate,
                track_estimates=False
            )
        my_tqdm.update(0)
    else:
        my_tftc = None
        my_tqdm = None

    if mindepth == 0:
        if my_tqdm is not None and track_item in (WALK_TRACK_DIRS, WALK_TRACK_BOTH):
            my_tqdm.total += 1
            my_tqdm.update(0)
        updir = os.path.dirname(srcdir)
        srcdname = os.path.basename(srcdir)
        yield updir, [srcdname], []
        if my_tqdm is not None and track_item in (WALK_TRACK_DIRS, WALK_TRACK_BOTH):
            my_tqdm.update(1)

    for x in _walk_simple(srcdir, 1, mindepth, maxdepth, list_function,
                          my_tftc, my_tqdm, (not track_initialize_total)):
        yield x

    if my_tqdm is not None:
        my_tqdm.close()
Beispiel #14
0
def find(
    srcdir, dstdir=None,
    vreturn=None, vyield=None, print_findings=False,
    mindepth=None, maxdepth=float('inf'), outdepth=None, dmatch_maxdepth=None,
    fmatch=None, fmatch_re=None, fexcl=None, fexcl_re=None,
    dmatch=None, dmatch_re=None, dexcl=None, dexcl_re=None,
    fsub=None, dsub=None,
    copy_method=None, copy_overwrite_files=None, copy_overwrite_dirs=None, copy_overwrite_dmatch=None,
    sync_tree=False, transplant_tree=False, collapse_tree=False,
    copy_dryrun=None, copy_quiet=None, copy_debug=None,
    allow_dir_op=None,
    mkdir_upon_file_copy=False,
    allow_nonstd_shprogs=False,
    copy_shcmd_fmtstr=None,
    list_function=None,
    rematch_function=None,
    resub_function=None,
    rematch_partial=False
):
    if vreturn is None and vyield is None:
        ffilter = ([arg is not None and len(arg) != 0 for arg in [fmatch, fmatch_re, fexcl, fexcl_re, fsub]].count(True) > 0)
        dfilter = ([arg is not None and len(arg) != 0 for arg in [dmatch, dmatch_re, dexcl, dexcl_re, dsub]].count(True) > 0)
        if ffilter and dfilter:
            vreturn = FIND_RETURN_MIX
        elif ffilter:
            vreturn = FIND_RETURN_FILES
        elif dfilter:
            vreturn = FIND_RETURN_DIRS
        else:
            vreturn = FIND_RETURN_MIX

    return_items = [item_list for item_list in [vreturn, vyield] if item_list is not None]
    if len(return_items) != 1:
        raise cerr.InvalidArgumentError("One and only one of (`vreturn`, `vyield`) arguments must be provided")
    if type(return_items[0]) in (tuple, list):
        return_items = list(return_items[0])
    for item in return_items:
        if item not in FIND_RETURN_CHOICES:
            raise cerr.InvalidArgumentError("`vreturn`/`vyield` string arguments must be one of {}, "
                                            "but argument was {}".format(FIND_RETURN_CHOICES, return_items))
    if 1 <= len(set(return_items)) <= 2:
        pass
    else:
        raise cerr.InvalidArgumentError("`vreturn`/`vyield` argument contains duplicate items")

    return_mix = (FIND_RETURN_MIX in return_items)
    return_mix_only = (return_items == [FIND_RETURN_MIX])

    dirs_all = []
    files_all = []
    mix_all = []
    def _find_iter():
        for rootdir, dnames, fnames in _walk(
            srcdir, dstdir,
            mindepth, maxdepth, outdepth, dmatch_maxdepth,
            fmatch, fmatch_re, fexcl, fexcl_re,
            dmatch, dmatch_re, dexcl, dexcl_re,
            fsub, dsub,
            copy_method, copy_overwrite_files, copy_overwrite_dirs, copy_overwrite_dmatch,
            sync_tree, transplant_tree, collapse_tree,
            copy_dryrun, copy_quiet, copy_debug,
            allow_dir_op,
            mkdir_upon_file_copy,
            allow_nonstd_shprogs,
            copy_shcmd_fmtstr,
            list_function,
            rematch_function,
            resub_function,
            rematch_partial
        ):
            dirs = [os.path.join(rootdir, dn) for dn in dnames] if (FIND_RETURN_DIRS in return_items or return_mix) else None
            files = [os.path.join(rootdir, fn) for fn in fnames] if (FIND_RETURN_FILES in return_items or return_mix) else None
            if return_mix:
                mix = dirs if return_mix_only else list(dirs)
                mix.extend(files)
                if return_mix_only:
                    dirs, files = None, None
            else:
                mix = None

            if print_findings:
                if mix:
                    for p in mix:
                        sys.stdout.write(p+'\n')
                else:
                    if dirs:
                        for d in dirs:
                            sys.stdout.write(d+'\n')
                    if files:
                        for f in files:
                            sys.stdout.write(f+'\n')

            if vreturn:
                if dirs:
                    dirs_all.extend(dirs)
                if files:
                    files_all.extend(files)
                if mix:
                    mix_all.extend(mix)

            if vyield:
                if len(return_items) == 1:
                    item = return_items[0]
                    yield_results = files if item == FIND_RETURN_FILES else (dirs if item == FIND_RETURN_DIRS else mix)
                    for p in yield_results:
                        yield p
                else:
                    yield_results = []
                    for item in return_items:
                        yield_results.append(files if item == FIND_RETURN_FILES else (dirs if item == FIND_RETURN_DIRS else mix))
                    yield yield_results

    if vyield:
        return _find_iter()

    if vreturn:
        exhaust(_find_iter())
        if len(return_items) == 1:
            item = return_items[0]
            return_results = files_all if item == FIND_RETURN_FILES else (dirs_all if item == FIND_RETURN_FILES else mix_all)
        else:
            return_results = []
            for item in return_items:
                return_results.append(files_all if item == FIND_RETURN_FILES else (dirs_all if item == FIND_RETURN_FILES else mix_all))
        return return_results
Beispiel #15
0
def execute_shell_command(cmd_str=None,
                          tokenize_cmd=False,
                          arg_list=[],
                          shell=None,
                          cwd=None,
                          env=None,
                          executable=None,
                          bufsize=-1,
                          stdin=None,
                          stdout=None,
                          stderr=None,
                          send_stderr_to_stdout=False,
                          quiet=False,
                          return_streams=False,
                          return_popen=False,
                          success_error_codes=[0],
                          ignore_failure=False,
                          throw_exception_in_failure=True,
                          print_stderr_in_failure=True,
                          print_failure_info=False,
                          print_begin_info=False,
                          print_end_info=False):
    if [cmd_str is not None, len(arg_list) > 0].count(True) != 1:
        raise cerr.InvalidArgumentError(
            "Only one of (`cmd_str`, `arg_list`) arguments must be provide")
    if return_streams and return_popen:
        raise cerr.InvalidArgumentError(
            "Only one of (`return_streams`, `return_popen`) arguments may be provided"
        )
    if cmd_str is not None:
        args = shlex.split(cmd_str) if tokenize_cmd else cmd_str
        if shell is None:
            shell = True
    else:
        args = arg_list
        if shell is None:
            shell = False
        cmd_str = ' '.join(arg_list)
    if quiet or return_streams:
        if stdout is None:
            stdout = subprocess.PIPE
        if stderr is None and not send_stderr_to_stdout:
            stderr = subprocess.PIPE
    if send_stderr_to_stdout:
        if stderr is not None:
            raise cerr.InvalidArgumentError(
                "`stderr` argument must be None when `send_stderr_to_stdout` argument is True"
            )
        stderr = subprocess.STDOUT
    proc = subprocess.Popen(args,
                            bufsize=bufsize,
                            executable=executable,
                            stdin=stdin,
                            stdout=stdout,
                            stderr=stderr,
                            shell=shell,
                            cwd=cwd,
                            env=env,
                            universal_newlines=True)
    proc_pid = proc.pid
    if print_begin_info:
        print('Beginning external call (PID {}): """ {} """'.format(
            proc_pid, cmd_str))
    stdout, stderr = proc.communicate()
    return_code = proc.returncode
    if not quiet:
        if stdout is not None:
            sys.stdout.write(stdout)
        if stderr is not None:
            sys.stderr.write(stderr)
            print_stderr_in_failure = False
    if return_code not in success_error_codes and not ignore_failure:
        errmsg = 'External call (PID {}) failed with non-zero exit status ({}): """ {} """'.format(
            proc_pid, return_code, cmd_str)
        if throw_exception_in_failure:
            raise cerr.ExternalError(errmsg)
        if print_stderr_in_failure and stderr is not None:
            sys.stderr.write(stderr)
        if print_failure_info:
            eprint(print_failure_info)
    if print_end_info:
        print("External call (PID {}) completed successfully".format(proc_pid))
    if return_popen:
        return proc
    else:
        return (return_code, stdout, stderr) if return_streams else return_code
Beispiel #16
0
    def read(self, tasklist_file_or_buff, col_delim=',',
             uniform_dtype=str,
             col_argname_dtype_def=None,  # collection of tuples like ((0, str))
             args=None, col_argstr_def=None,
             ncol_min=None, ncol_max=None, ncol_strict=True,
             expect_header=None, check_header_for_argnames=None,
             allow_missing_header=None, ncol_strict_header_separate=False):

        header_items = None
        task_array = []

        if col_argname_dtype_def is not None and col_argstr_def is not None:
            raise cerr.InvalidArgumentError(
                "`col_argname_dtype_def` and `col_argstr_def` arguments are mutually exclusive"
            )

        if args is None and col_argname_dtype_def is None and col_argstr_def is None:
            if expect_header is None:
                expect_header = False
            if check_header_for_argnames is True or allow_missing_header is True:
                raise cerr.InvalidArgumentError(
                    "`check_header_for_argnames` or `allow_missing_header` can only be True "
                    "when either `col_argname_dtype_def` or `col_argstr_def` are provided"
                )
            check_header_for_argnames = False
            allow_missing_header = False
        else:
            if expect_header is None:
                expect_header = True
            if check_header_for_argnames is None:
                check_header_for_argnames = True
            if col_argname_dtype_def is None and col_argstr_def is None:
                if allow_missing_header is True:
                    raise cerr.InvalidArgumentError(
                        "`allow_missing_header` can only be True "
                        "when either `col_argname_dtype_def` or `col_argstr_def` are provided"
                    )
                allow_missing_header = False
            elif allow_missing_header is None:
                allow_missing_header = True
                
        col_argname_dtype_def_derived = None

        if col_argname_dtype_def is not None:
            col_argname_dtype_def_derived = col_argname_dtype_def
        elif col_argstr_def is not None:
            if args is None:
                raise cerr.InvalidArgumentError("`args` argument must be provided along with `col_argstr_def`")
            col_argname_dtype_def_derived = [(argstr, args.argstr2argtype[argstr]) for argstr in col_argstr_def]

        with open(tasklist_file_or_buff, 'r') as tasklist_fp:

            first_line = tasklist_fp.readline().strip()
            if first_line == '':
                return
            first_line_items = [item.strip() for item in first_line.split(col_delim)]
            line_num = 1

            if not expect_header:
                task_line = first_line
                task_line_items = first_line_items
            else:
                header_line = first_line
                header_items = first_line_items

                if check_header_for_argnames:
                    assert col_argname_dtype_def is not None or col_argstr_def is not None
                    assert col_argname_dtype_def_derived is not None
                    
                    header_argname_set = set(header_items)
                    col_argname_def_list = [argname for argname, argtype in col_argname_dtype_def_derived]
                    col_argname_def_set = set(col_argname_def_list)

                    if header_argname_set.issubset(col_argname_def_set):
                        argname_dtype_dict = {argname: dtype for argname, dtype in col_argname_dtype_def_derived}
                        col_argname_dtype_def_derived = [(argname, argname_dtype_dict[argname]) for argname in header_items]

                    elif args is not None and header_argname_set.issubset(set(args.all_argstr)):
                        if col_argname_dtype_def is not None:
                            warning(
                                "Argument names in header line ({}) of tasklist file ({}) are not a "
                                "subset of `col_argname_dtype_def` argument names ({}), but are instead "
                                "a subset of script ArgumentPasser argument strings which will be "
                                "leveraged instead.".format(
                                    header_line, tasklist_file_or_buff, col_argname_def_set
                                )
                            )
                        elif col_argstr_def is not None:
                            warning(
                                "Argument strings in header line ({}) of tasklist file ({}) are not "
                                "the expected set of `col_argstr_def` argument strings ({})".format(
                                    header_line, tasklist_file_or_buff, col_argname_def_set
                                )
                            )
                        col_argname_dtype_def_derived = [(argstr, args.argstr2argtype[argstr]) for argstr in header_items]

                    elif allow_missing_header:
                        if col_argname_dtype_def is not None:
                            warning(
                                "Tasklist file ({}) does not have an acceptable header line, "
                                "so it will be assumed that columns follow the provided "
                                "`col_argname_dtype_def` order precisely as given: {}".format(
                                    tasklist_file_or_buff, col_argname_def_list
                                )
                            )
                        elif col_argstr_def is not None:
                            warning(
                                "Tasklist file ({}) does not have an acceptable header line, "
                                "so it will be assumed that columns follow the provided "
                                "`col_argstr_def` order precisely as given: {}".format(
                                    tasklist_file_or_buff, col_argname_def_list
                                )
                            )
                        header_items = None
                    else:
                        raise cerr.ScriptArgumentError("Tasklist file does not have an acceptable header line: {}".format(tasklist_file_or_buff))

                if header_items is not None:
                    ncol_header = len(header_items)
                    if ncol_min is not None and ncol_header < ncol_min:
                        raise cerr.ScriptArgumentError(
                            "Tasklist file header line has {} columns, less than `ncol_min` required minimum ({}): {}".format(
                                ncol_header, ncol_min, tasklist_file_or_buff
                            ))
                    if ncol_max is not None and ncol_header > ncol_max:
                        raise cerr.ScriptArgumentError(
                            "Tasklist file header line has {} columns, more than `ncol_max` required maximum ({}): {}".format(
                                ncol_header, ncol_max, tasklist_file_or_buff
                            ))
                    if ncol_max is not None and ncol_header < ncol_max:
                        raise cerr.ScriptArgumentError(
                            "Tasklist file header line has {} columns, less than `ncol_max` possible maximum ({}): {}".format(
                                ncol_header, ncol_max, tasklist_file_or_buff
                            ))
                    
                if header_items is None:
                    task_line = first_line
                    task_line_items = first_line_items
                else:
                    task_line = tasklist_fp.readline().strip()
                    task_line_items = [item.strip() for item in task_line.split(col_delim)]

                    if len(task_line_items) != len(header_items):
                        errmsg = ("Tasklist file ({}) number of columns mismatch between header ({}) "
                                  "and body ({})".format(tasklist_file_or_buff, len(header_items), len(task_line_items)))
                        if ncol_strict and not ncol_strict_header_separate:
                            raise cerr.ScriptArgumentError(errmsg)
                        else:
                            warning(errmsg)
                    
            if col_argname_dtype_def_derived is not None:
                col_dtype_def_derived = [dtype for argname, dtype in col_argname_dtype_def_derived]
                # if ncol_min is None:
                #     ncol_min = len(col_argname_dtype_def_derived)
                if ncol_max is None:
                    ncol_max = len(col_argname_dtype_def_derived)
            else:
                col_dtype_def_derived = None

            if ncol_strict:
                ncol_body = len(task_line_items)
            ncol_body_max = float('-inf')

            while task_line != '':
                line_num += 1

                ncol_task_line = len(task_line_items)
                if ncol_strict and ncol_task_line != ncol_body:
                    raise cerr.ScriptArgumentError(
                        "Tasklist file {}, line {}: Number of columns ({}) breaks from constant "
                        "number of columns ({}) established prior to this line (`ncol_strict=True`)".format(
                            tasklist_file_or_buff, line_num, ncol_task_line, ncol_body
                        ))
                elif ncol_min is not None and ncol_task_line < ncol_min:
                    raise cerr.ScriptArgumentError(
                        "Tasklist file {}, line {}: Number of columns ({}) is less than `ncol_min` "
                        "required minimum ({})".format(
                            tasklist_file_or_buff, line_num, ncol_task_line, ncol_min
                        ))
                elif ncol_max is not None and ncol_task_line > ncol_max:
                    raise cerr.ScriptArgumentError(
                        "Tasklist file {}, line {}: Number of columns ({}) is more than `ncol_max` "
                        "required maximum ({})".format(
                            tasklist_file_or_buff, line_num, ncol_task_line, ncol_max
                        ))

                if ncol_task_line > ncol_body_max:
                    ncol_body_max = ncol_task_line

                if col_dtype_def_derived is not None:
                    task_line_values = [col_dtype_def_derived[col_idx](item) for col_idx, item in enumerate(task_line_items)]
                else:
                    task_line_values = [uniform_dtype(item) for item in task_line_items]

                task_array.append(task_line_values)

                task_line = tasklist_fp.readline().strip()
                task_line_items = [item.strip() for item in task_line.split(col_delim)]

            if col_argname_dtype_def_derived is None:
                col_argname_dtype_def_derived = [(argname, uniform_dtype) for argname in range(ncol_body_max)]
Beispiel #17
0
    def walk(self,
             srcdir, dstdir=None,
             copy_overwrite_files=None, copy_overwrite_dirs=None,
             sync_tree=False, transplant_tree=False, collapse_tree=None,
             copy_dryrun=None, copy_quiet=None, copy_debug=None,
             track_progress=None, track_initialize_total=None):
        self.track_count_only = False
        self.track_update_total = True

        if collapse_tree is None:
            collapse_tree = self.collapse_tree
        if track_progress is None:
            track_progress = self.track_progress
        if track_initialize_total is None:
            track_initialize_total = self.track_initialize_total

        if sync_tree:
            self.outdepth_inst = 1
        elif transplant_tree:
            self.outdepth_inst = 0
        else:
            self.outdepth_inst = self.outdepth

        srcdir = os.path.normpath(os.path.expanduser(srcdir))
        if not os.path.isdir(srcdir):
            raise cerr.InvalidArgumentError("`srcdir` directory does not exist: {}".format(srcdir))
        if dstdir is not None:
            dstdir = os.path.normpath(os.path.expanduser(dstdir))
        if self.outdepth_inst == 0:
            dstdir = os.path.join(dstdir, os.path.basename(srcdir))

        self.srcdir = srcdir
        self.dstdir = dstdir
        self.collapse_tree_inst = collapse_tree
        if self.copy_method is None:
            self.copy_method_inst = None
        else:
            self.copy_method_inst = copy.copy(self.copy_method)
            self.copy_method_inst.set_options(
                copy_overwrite_files=copy_overwrite_files,
                copy_overwrite_dirs=copy_overwrite_dirs,
                copy_dryrun=copy_dryrun,
                copy_verbose=(None if copy_quiet is None else (not copy_quiet)),
                copy_debug=copy_debug
            )

        depth = 0
        dmatch_depth = -1 if not self.dname_rematch else 0

        if dmatch_depth == 0:
            srcdname = os.path.basename(self.srcdir)
            srcdname_match = True
            if self.dname_reexcl:
                for re_pattern in self.dname_reexcl:
                    srcdname_match = (not self.rematch_function(re_pattern, srcdname))
                    if not srcdname_match:
                        break
                if not srcdname_match:
                    return
            if self.dname_rematch and srcdname_match:
                srcdname_match = False
                for re_pattern in self.dname_rematch:
                    srcdname_match = self.rematch_function(re_pattern, srcdname)
                    if srcdname_match:
                        break
            if srcdname_match:
                dmatch_depth = 1

        if self.allow_dir_op and dmatch_depth != 0 and (self.mindepth <= depth <= self.maxdepth) and self.outdepth_inst in (-1, 0):
            if not self.copy_method_inst.dryrun:
                os.makedirs(os.path.dirname(os.path.normpath(self.dstdir)), exist_ok=True)
            copy_success = self.copy_method_inst.copy(
                self.srcdir, self.dstdir,
                srcpath_is_file=False,
                overwrite_dir=(self.copy_method_inst.copy_overwrite_dirs or (self.copy_overwrite_dmatch and dmatch_depth == 1)),
            )
            return

        if track_progress and imported_tqdm:
            if track_initialize_total:
                print("First counting files to process in directory: {}".format(self.srcdir))
            self.tftc = TrackFileTreeCount(WALK_TRACK_FILES)
            self.tqdm = tqdm(total=0, unit=WALK_TRACK_ITEM_UNIT_DICT[WALK_TRACK_FILES], disable=False)
            # self.tqdm = tqdm(total=0, unit=WALK_TRACK_ITEM_UNIT_DICT[WALK_TRACK_FILES], disable=False) if not track_initialize_total else None
        else:
            self.tftc = None
            self.tqdm = None

        if self.copy_method_inst is not None and self.dstdir is not None and not os.path.isdir(self.dstdir):
            if not self.copy_method_inst.dryrun:
                os.makedirs(self.dstdir)

        depth = 1

        if self.tftc is not None:
            if track_initialize_total:
                self.track_count_only = True
                self.track_update_total = True
                dryrun_backup = self.copy_method_inst.dryrun
                self.copy_method_inst.dryrun = True
                depth_backup = depth

                if self.tqdm is not None:
                    self.tqdm.update(0)
                exhaust(self._walk(self.srcdir, self.dstdir, depth, dmatch_depth))
                item_total, item_est = self.tftc.get_item_count_estimate()
                if self.tqdm is not None:
                    self.tqdm.close()

                print("Now processing files in directory: {}".format(self.srcdir))
                self.tqdm = tqdm(total=item_total, unit=WALK_TRACK_ITEM_UNIT_DICT[WALK_TRACK_FILES], disable=False)
                self.tftc = TrackFileTreeCount(
                    WALK_TRACK_FILES,
                    initial_file_estimate=self.tftc.total_file_estimate,
                    initial_folder_estimate=self.tftc.total_folder_estimate,
                    track_estimates=False
                )
                self.tqdm.update(0)

                self.track_count_only = False
                self.track_update_total = False
                self.copy_method_inst.dryrun = dryrun_backup
                depth = depth_backup

            self.tqdm.update(0)

        for x in self._walk(self.srcdir, self.dstdir, depth, dmatch_depth):
            yield x

        self.track_count_only = False
        self.track_update_total = True
        if self.tftc is not None:
            self.tftc = None
        if self.tqdm is not None:
            self.tqdm.close()
            self.tqdm = None