Beispiel #1
0
def main():
    """Implement "isodatetime" command."""
    arg_parser = ArgumentParser(
        prog='isodatetime',
        formatter_class=RawDescriptionHelpFormatter,
        description=__doc__)
    for o_args, o_kwargs in [
        [
            ["items"],
            {
                "help": "Time point, duration or recurrence string",
                "metavar": "ITEM",
                "nargs": "*",
            },
        ],
        [
            ["--as-total"],
            {
                "action": "store",
                "choices": ['H', 'M', 'S', 'h', 'm', 's'],
                "dest": "duration_print_format",
                "help": "Print duration as total of the specified unit.",
                "metavar": "UNIT",
            },
        ],
        [
            ["--calendar"],
            {
                "action": "store",
                "choices": ["360day", "365day", "366day", "gregorian"],
                "help": "Set the calendar mode.",
                "metavar": "MODE",
            },
        ],
        [
            ["--max="],
            {
                "action": "store",
                "default": 10,
                "dest": "max_results",
                "help": "Specify maximum number of results.",
                "metavar": "N",
                "type": int,
            },
        ],
        [
            ["--offset1", "--offset", "-s", "-1"],
            {
                "action": "append",
                "dest": "offsets1",
                "metavar": "OFFSET",
                "help": "Specify offsets for 1st date time point.",
            },
        ],
        [
            ["--offset2", "-2"],
            {
                "action": "append",
                "dest": "offsets2",
                "metavar": "OFFSET",
                "help": "Specify offsets for 2nd date time point.",
            },
        ],
        [
            ["--parse-format", "-p"],
            {
                "metavar": "FORMAT",
                "help": "Specify the format for parsing inputs.",
            },
        ],
        [
            ["--print-format", "--format", "-f"],
            {
                "metavar": "FORMAT",
                "help": "Specify the format for printing results.",
            },
        ],
        [
            ["--ref", "-R"],
            {
                "action": "store",
                "dest": "ref_point_str",
                "help": "Specify a reference point string.",
                "metavar": "REF",
            },
        ],
        [
            ["--utc", "-u"],
            {
                "action": "store_true",
                "default": False,
                "dest": "utc_mode",
                "help": "Switch on UTC mode.",
            },
        ],
        [
            ["--version", "-V"],
            {
                "action": "store_true",
                "default": False,
                "dest": "version_mode",
                "help": "Print version and exit.",
            },
        ],
    ]:
        arg_parser.add_argument(*o_args, **o_kwargs)
    if hasattr(arg_parser, 'parse_intermixed_args'):
        args = arg_parser.parse_intermixed_args()
    else:
        args = arg_parser.parse_args()
    if args.version_mode:
        print(__version__)
        return
    date_time_oper = DateTimeOperator(
        parse_format=args.parse_format,
        utc_mode=args.utc_mode,
        calendar_mode=args.calendar,
        ref_point_str=args.ref_point_str)

    try:
        if len(args.items) >= 2:
            out = date_time_oper.diff_time_point_strs(
                args.items[0],
                args.items[1],
                args.offsets1,
                args.offsets2,
                args.print_format,
                args.duration_print_format)
        elif args.items and args.items[0].startswith("R"):
            outs = []
            for item in date_time_oper.iter_recurrence_str(
                args.items[0],
                args.print_format,
            ):
                outs.append(item)
                if len(outs) >= args.max_results:
                    break
            out = '\n'.join(outs)
        elif args.items and args.duration_print_format:
            out = date_time_oper.format_duration_str(
                args.items[0], args.duration_print_format)
        else:
            time_point_str = None
            if args.items:
                time_point_str = args.items[0]
            out = date_time_oper.process_time_point_str(
                time_point_str, args.offsets1, args.print_format)
    except ValueError as exc:
        sys.exit(exc)
    else:
        print(out)
Beispiel #2
0
def main():
    """Implement "isodatetime" command."""
    arg_parser = ArgumentParser(
        prog='isodatetime',
        formatter_class=RawDescriptionHelpFormatter,
        description=__doc__)
    for o_args, o_kwargs in [
        [
            ["items"],
            {
                "help": "Time point, duration or recurrence string",
                "metavar": "ITEM",
                "nargs": "*",
            },
        ],
        [
            ["--as-total"],
            {
                "action": "store",
                "choices": ['H', 'M', 'S', 'h', 'm', 's'],
                "dest": "duration_print_format",
                "help": "Print duration as total of the specified unit.",
                "metavar": "UNIT",
            },
        ],
        [
            ["--calendar"],
            {
                "action": "store",
                "choices": ["360day", "365day", "366day", "gregorian"],
                "help": "Set the calendar mode.",
                "metavar": "MODE",
            },
        ],
        [
            ["--max="],
            {
                "action": "store",
                "default": 10,
                "dest": "max_results",
                "help": "Specify maximum number of results.",
                "metavar": "N",
                "type": int,
            },
        ],
        [
            ["--offset1", "--offset", "-s", "-1"],
            {
                "action": "append",
                "dest": "offsets1",
                "metavar": "OFFSET",
                "help": "Specify offsets for 1st date time point.",
            },
        ],
        [
            ["--offset2", "-2"],
            {
                "action": "append",
                "dest": "offsets2",
                "metavar": "OFFSET",
                "help": "Specify offsets for 2nd date time point.",
            },
        ],
        [
            ["--parse-format", "-p"],
            {
                "metavar": "FORMAT",
                "help": "Specify the format for parsing inputs.",
            },
        ],
        [
            ["--print-format", "--format", "-f"],
            {
                "metavar": "FORMAT",
                "help": "Specify the format for printing results.",
            },
        ],
        [
            ["--ref", "-R"],
            {
                "action": "store",
                "dest": "ref_point_str",
                "help": "Specify a reference point string.",
                "metavar": "REF",
            },
        ],
        [
            ["--utc", "-u"],
            {
                "action": "store_true",
                "default": False,
                "dest": "utc_mode",
                "help": "Switch on UTC mode.",
            },
        ],
        [
            ["--version", "-V"],
            {
                "action": "store_true",
                "default": False,
                "dest": "version_mode",
                "help": "Print version and exit.",
            },
        ],
    ]:
        arg_parser.add_argument(*o_args, **o_kwargs)
    if hasattr(arg_parser, 'parse_intermixed_args'):
        args = arg_parser.parse_intermixed_args()
    else:
        args = arg_parser.parse_args()
    if args.version_mode:
        print(__version__)
        return
    date_time_oper = DateTimeOperator(
        parse_format=args.parse_format,
        utc_mode=args.utc_mode,
        calendar_mode=args.calendar,
        ref_point_str=args.ref_point_str)

    try:
        if len(args.items) >= 2:
            out = date_time_oper.diff_time_point_strs(
                args.items[0],
                args.items[1],
                args.offsets1,
                args.offsets2,
                args.print_format,
                args.duration_print_format)
        elif args.items and args.items[0].startswith("R"):
            outs = []
            for item in date_time_oper.iter_recurrence_str(
                args.items[0],
                args.print_format,
            ):
                outs.append(item)
                if len(outs) >= args.max_results:
                    break
            out = '\n'.join(outs)
        elif args.items and args.duration_print_format:
            out = date_time_oper.format_duration_str(
                args.items[0], args.duration_print_format)
        else:
            time_point_str = None
            if args.items:
                time_point_str = args.items[0]
            out = date_time_oper.process_time_point_str(
                time_point_str, args.offsets1, args.print_format)
    except ValueError as exc:
        sys.exit(exc)
    else:
        print(out)
Beispiel #3
0
def parse_args():
    arg_parser = ArgumentParser(prog='isodatetime',
                                formatter_class=RawDescriptionHelpFormatter,
                                description=__doc__)
    for o_args, o_kwargs in [
        [
            ["items"],
            {
                "help": ("Time point, duration or recurrence string."
                         " To read from stdin use '-'."),
                "metavar":
                "ITEM",
                "nargs":
                "*",
            },
        ],
        [
            ["--as-total"],
            {
                "action": "store",
                "choices": ['H', 'M', 'S', 'h', 'm', 's'],
                "dest": "duration_print_format",
                "help": "Print duration as total of the specified unit.",
                "metavar": "UNIT",
            },
        ],
        [
            ["--calendar"],
            {
                "action": "store",
                "choices": ["360day", "365day", "366day", "gregorian"],
                "help": "Set the calendar mode.",
                "metavar": "MODE",
            },
        ],
        [
            ["--max="],
            {
                "action": "store",
                "default": 10,
                "dest": "max_results",
                "help": "Specify maximum number of results.",
                "metavar": "N",
                "type": int,
            },
        ],
        [
            ["--offset1", "--offset", "-s", "-1"],
            {
                "action": "append",
                "dest": "offsets1",
                "metavar": "OFFSET",
                "help": "Specify offsets for 1st date time point.",
            },
        ],
        [
            ["--offset2", "-2"],
            {
                "action": "append",
                "dest": "offsets2",
                "metavar": "OFFSET",
                "help": "Specify offsets for 2nd date time point.",
            },
        ],
        [
            ["--parse-format", "-p"],
            {
                "metavar": "FORMAT",
                "help": "Specify the format for parsing inputs.",
            },
        ],
        [
            ["--print-format", "--format", "-f"],
            {
                "metavar": "FORMAT",
                "help": "Specify the format for printing results.",
            },
        ],
        [
            ["--ref", "-R"],
            {
                "action": "store",
                "dest": "ref_point_str",
                "help": "Specify a reference point string.",
                "metavar": "REF",
            },
        ],
        [
            ["--utc", "-u"],
            {
                "action": "store_true",
                "default": False,
                "dest": "utc_mode",
                "help": "Switch on UTC mode.",
            },
        ],
        [
            ["--version", "-V"],
            {
                "action": "store_true",
                "default": False,
                "dest": "version_mode",
                "help": "Print version and exit.",
            },
        ],
    ]:
        arg_parser.add_argument(*o_args, **o_kwargs)
    if hasattr(arg_parser, 'parse_intermixed_args'):
        args = arg_parser.parse_intermixed_args()
    else:
        args = arg_parser.parse_args()

    if args.offsets1:
        args.offsets1 = [item.replace("\\", "") for item in args.offsets1]

    if args.offsets2:
        args.offsets2 = [item.replace("\\", "") for item in args.offsets2]

    return args
Beispiel #4
0
class MRJobLauncher(object):
    """Handle running a MapReduce job on an executable from the command line.
    This class will eventually support running arbitrary executables; for now
    it only supports :py:class:`~mrjob.job.MRJob` subclasses. Up to v0.5 it is
    effectively part of the :py:class:`~mrjob.job.MRJob` class itself and
    should not be used externally in any way.
    """
    # only MRJobLauncher expects the first argument to be script_path
    _FIRST_ARG_IS_SCRIPT_PATH = True

    def __init__(self, script_path=None, args=None, from_cl=False):
        """
        :param script_path: Path to script unless it's the first item of *args*
        :param args: Command line arguments
        :param from_cl: If not using sys.argv but still comming from the
                        command line (as opposed to a script, e.g. from
                        mrjob.cmd), don't override the option parser error
                        function (exit instead of throwing ValueError).
        """
        if script_path is not None:
            script_path = os.path.abspath(script_path)
        self._script_path = script_path

        # make sure we respect the $TZ (time zone) environment variable
        if hasattr(time, 'tzset'):
            time.tzset()

        # argument dests for args to pass through
        self._passthru_arg_dests = set()
        self._file_arg_dests = set()

        # there is no equivalent in argparse
        # remove this in v0.7.0
        if hasattr(self, 'OPTION_CLASS'):
            log.warning('OPTION_CLASS attribute is ignored; '
                        'mrjob now uses argparse instead of optparse')

        self.arg_parser = ArgumentParser(usage=self._usage(),
                                         add_help=False)
        self.configure_args()

        if (_im_func(self.configure_options) !=
                _im_func(MRJobLauncher.configure_options)):
            log.warning('configure_options() is deprecated and will be'
                        ' removed in v0.7.0; please use configure_args()'
                        ' instead.')
            self.configure_options()

        # don't pass None to parse_args unless we're actually running
        # the MRJob script
        if args is _READ_ARGS_FROM_SYS_ARGV:
            self._cl_args = sys.argv[1:]
        else:
            # don't pass sys.argv to self.arg_parser, and have it
            # raise an exception on error rather than printing to stderr
            # and exiting.
            self._cl_args = args or []

            def error(msg):
                raise ValueError(msg)

            if not from_cl:
                self.arg_parser.error = error

        self.load_args(self._cl_args)

        if (_im_func(self.load_options) !=
                _im_func(MRJobLauncher.load_options)):
            log.warning('load_options() is deprecated and will be'
                        ' removed in v0.7.0; please use load_args()'
                        ' instead.')
            self.load_options(self._cl_args)

        # Make it possible to redirect stdin, stdout, and stderr, for testing
        # See stdin, stdout, stderr properties and sandbox(), below.
        self._stdin = None
        self._stdout = None
        self._stderr = None

    # by default, self.stdin, self.stdout, and self.stderr are sys.std*.buffer
    # if it exists, and otherwise sys.std* otherwise (they should always deal
    # with bytes, not Unicode).
    #
    # *buffer* is pretty much a Python 3 thing, though some platforms
    # (notably Jupyterhub) don't have it. See #1441

    @property
    def stdin(self):
        return self._stdin or getattr(sys.stdin, 'buffer', sys.stdin)

    @property
    def stdout(self):
        return self._stdout or getattr(sys.stdout, 'buffer', sys.stdout)

    @property
    def stderr(self):
        return self._stderr or getattr(sys.stderr, 'buffer', sys.stderr)

    @classmethod
    def _usage(cls):
        """Command line usage string for this class"""
        return ("mrjob run [script path|executable path|--help]"
                " [options] [input files]")

    def _print_help(self, options):
        """Print help for this job. This will either print runner
        or basic help. Override to allow other kinds of help."""
        if options.runner:
            _print_help_for_runner(
                self._runner_opt_names_for_help(), options.deprecated)
        else:
            _print_basic_help(self.arg_parser,
                              self._usage(),
                              options.deprecated)

    @classmethod
    def run(cls, args=_READ_ARGS_FROM_SYS_ARGV):
        """Entry point for running job from the command-line.

        This is also the entry point when a mapper or reducer is run
        by Hadoop Streaming.

        Does one of:

        * Print step information (:option:`--steps`). See :py:meth:`show_steps`
        * Run a mapper (:option:`--mapper`). See :py:meth:`run_mapper`
        * Run a combiner (:option:`--combiner`). See :py:meth:`run_combiner`
        * Run a reducer (:option:`--reducer`). See :py:meth:`run_reducer`
        * Run the entire job. See :py:meth:`run_job`
        """
        # load options from the command line
        launcher = cls(args=args)
        launcher.run_job()

    def execute(self):
        # Launcher only runs jobs, doesn't do any Hadoop Streaming stuff
        self.run_job()

    def make_runner(self):
        """Make a runner based on command-line arguments, so we can
        launch this job on EMR, on Hadoop, or locally.

        :rtype: :py:class:`mrjob.runner.MRJobRunner`
        """
        return self._runner_class()(**self._runner_kwargs())

    @classmethod
    def set_up_logging(cls, quiet=False, verbose=False, stream=None):
        """Set up logging when running from the command line. This is also
        used by the various command-line utilities.

        :param bool quiet: If true, don't log. Overrides *verbose*.
        :param bool verbose: If true, set log level to ``DEBUG`` (default is
                             ``INFO``)
        :param bool stream: Stream to log to (default is ``sys.stderr``)
        """
        if quiet:
            log_to_null(name='mrjob')
            log_to_null(name='__main__')
        else:
            log_to_stream(name='mrjob', debug=verbose, stream=stream)
            log_to_stream(name='__main__', debug=verbose, stream=stream)

    def run_job(self):
        """Run the all steps of the job, logging errors (and debugging output
        if :option:`--verbose` is specified) to STDERR and streaming the
        output to STDOUT.

        Called from :py:meth:`run`. You'd probably only want to call this
        directly from automated tests.
        """
        # self.stderr is strictly binary, need to wrap it so it's possible
        # to log to it in Python 3
        log_stream = codecs.getwriter('utf_8')(self.stderr)

        self.set_up_logging(quiet=self.options.quiet,
                            verbose=self.options.verbose,
                            stream=log_stream)

        with self.make_runner() as runner:
            try:
                runner.run()
            except StepFailedException as e:
                # no need for a runner stacktrace if step failed; runners will
                # log more useful information anyway
                log.error(str(e))
                sys.exit(1)

            if self._should_cat_output():
                for chunk in runner.cat_output():
                    self.stdout.write(chunk)
                self.stdout.flush()

    def _should_cat_output(self):
        if self.options.cat_output is None:
            return not self.options.output_dir
        else:
            return self.options.cat_output

    ### Command-line arguments ###

    def configure_args(self):
        """Define arguments for this script. Called from :py:meth:`__init__()`.

        Re-define to define custom command-line arguments or pass
        through existing ones::

            def configure_args(self):
                super(MRYourJob, self).configure_args()

                self.add_passthru_arg(...)
                self.add_file_arg(...)
                self.pass_arg_through(...)
                ...
        """
        # if script path isn't set, expect it on the command line
        if self._FIRST_ARG_IS_SCRIPT_PATH:
            self.arg_parser.add_argument(
                dest='script_path',
                help='path of script to launch')

        self.arg_parser.add_argument(
            dest='args', nargs='*',
            help=('input paths to read (or stdin if not set). If --spark'
                  ' is set, the input and output path for the spark job.'))

        _add_basic_args(self.arg_parser)
        _add_job_args(self.arg_parser)
        _add_runner_args(self.arg_parser)

    def load_args(self, args):
        """Load command-line options into ``self.options`` and
        ``self._script_path``.

        Called from :py:meth:`__init__()` after :py:meth:`configure_args`.

        :type args: list of str
        :param args: a list of command line arguments. ``None`` will be
                     treated the same as ``[]``.

        Re-define if you want to post-process command-line arguments::

            def load_args(self, args):
                super(MRYourJob, self).load_args(args)

                self.stop_words = self.options.stop_words.split(',')
                ...
        """
        if hasattr(self.arg_parser, 'parse_intermixed_args'):
            # restore old optparse behavior on Python 3.7+. See #1701
            self.options = self.arg_parser.parse_intermixed_args(args)
        else:
            self.options = self.arg_parser.parse_args(args)

        if self.options.help:
            self._print_help(self.options)
            sys.exit(0)

        if self._FIRST_ARG_IS_SCRIPT_PATH:
            # should always be set, just hedging
            self._script_path = self.options.script_path

    def add_file_arg(self, *args, **kwargs):
        """Add a command-line option that sends an external file
        (e.g. a SQLite DB) to Hadoop::

             def configure_args(self):
                super(MRYourJob, self).configure_args()
                self.add_file_arg('--scoring-db', help=...)

        This does the right thing: the file will be uploaded to the working
        dir of the script on Hadoop, and the script will be passed the same
        option, but with the local name of the file in the script's working
        directory.

        .. note::

           If you pass a file to a job, best practice is to lazy-load its
           contents (e.g. make a method that opens the file the first time
           you call it) rather than loading it in your job's constructor or
           :py:meth:`load_args`. Not only is this more efficient, it's
           necessary if you want to run your job in a Spark executor
           (because the file may not be in the same place in a Spark driver).

        .. note::

           We suggest against sending Berkeley DBs to your job, as
           Berkeley DB is not forwards-compatible (so a Berkeley DB that you
           construct on your computer may not be readable from within
           Hadoop). Use SQLite databases instead. If all you need is an on-disk
           hash table, try out the :py:mod:`sqlite3dbm` module.

        .. versionchanged:: 0.6.6

           now accepts explicit ``type=str``

        .. versionchanged:: 0.6.8

           fully supported on Spark, including ``local[*]`` master
        """
        if kwargs.get('type') not in (None, str):
            raise ArgumentTypeError(
                'file options must take strings')

        if kwargs.get('action') not in (None, 'append', 'store'):
            raise ArgumentTypeError(
                "file options must use the actions 'store' or 'append'")

        pass_opt = self.arg_parser.add_argument(*args, **kwargs)

        self._file_arg_dests.add(pass_opt.dest)

    def add_passthru_arg(self, *args, **kwargs):
        """Function to create options which both the job runner
        and the job itself respect (we use this for protocols, for example).

        Use it like you would use
        :py:func:`argparse.ArgumentParser.add_argument`::

            def configure_args(self):
                super(MRYourJob, self).configure_args()
                self.add_passthru_arg(
                    '--max-ngram-size', type=int, default=4, help='...')

        If you want to pass files through to the mapper/reducer, use
        :py:meth:`add_file_arg` instead.

        If you want to pass through a built-in option (e.g. ``--runner``, use
        :py:meth:`pass_arg_through` instead.
        """
        pass_opt = self.arg_parser.add_argument(*args, **kwargs)

        self._passthru_arg_dests.add(pass_opt.dest)

    def pass_arg_through(self, opt_str):
        """Pass the given argument through to the job."""

        # _actions is hidden but the interface appears to be stable,
        # and there's no non-hidden interface we can use
        for action in self.arg_parser._actions:
            if opt_str in action.option_strings or opt_str == action.dest:
                self._passthru_arg_dests.add(action.dest)
                break
        else:
            raise ValueError('unknown arg: %s', opt_str)

    def is_task(self):
        """True if this is a mapper, combiner, or reducer.

        This is mostly useful inside :py:meth:`load_args`, to disable
        loading args when we aren't running inside Hadoop Streaming.
        """
        return False

    ### old optparse shims ###

    @property
    def args(self):
        class_name = self.__class__.__name__
        log.warning(
            '%s.args is a deprecated alias for %s.options.args, and will'
            ' be removed in v0.7.0' % (class_name, class_name))
        return self.options.args

    def configure_options(self):
        """.. deprecated:: 0.6.0

        Use `:py:meth:`configure_args` instead.
        """
        pass  # deprecation warning is in __init__()

    def load_options(self, args):
        """.. deprecated:: 0.6.0

        Use `:py:meth:`load_args` instead.
        """
        pass  # deprecation warning is in __init__()

    def add_file_option(self, *args, **kwargs):
        """.. deprecated:: 0.6.0

        Like :py:meth:`add_file_arg` except that it emulates the
        old :py:mod:`optparse` interface (which is almost identical).

        .. versionchanged:: 0.6.6

           accepts ``type='str'`` (used to only accept ``type='string'``)
        """
        log.warning(
            'add_file_option() is deprecated and will be removed in'
            ' v0.7.0. Use add_file_arg() instead.')

        self.add_file_arg(*args, **_optparse_kwargs_to_argparse(**kwargs))

    def add_passthrough_option(self, *args, **kwargs):
        """.. deprecated:: 0.6.0

        Like :py:meth:`add_passthru_arg` except that it emulates the
        old :py:mod:`optparse` interface (which is almost identical).

        .. versionchanged:: 0.6.6

           accepts ``type='str'`` (used to only accept ``type='string'``)
        """
        log.warning(
            'add_passthrough_option() is deprecated and will be removed in'
            ' v0.7.0. Use add_passthru_arg() instead.')

        self.add_passthru_arg(*args, **_optparse_kwargs_to_argparse(**kwargs))

    def pass_through_option(self, opt_str):
        """.. deprecated:: 0.6.0

        Like :py:meth:`pass_arg_through` except that it emulates the
        old :py:mod:`optparse` interface (which is almost identical).

        .. versionchanged:: 0.6.6

           accepts ``type='str'`` (used to only accept ``type='string'``)
        """
        log.warning(
            'pass_through_option() is deprecated and will be removed in'
            ' v0.7.0. Use pass_arg_through() instead.')

        self.pass_arg_through(opt_str)

    ### runners ###

    def _runner_class(self):
        """Runner class, as indicated by ``--runner``. This uses conditional
        imports to avoid importing runner modules that we don't need (and may
        not have libraries for).

        Defaults to ``'local'`` and disallows use of inline runner.
        """
        if not self.options.runner:
            return LocalMRJobRunner

        elif self.options.runner == 'inline':
            raise ValueError(
                "inline is not supported in the multi-lingual"
                " launcher.")

        return _runner_class(self.options.runner)

    def _runner_kwargs(self):
        # just use combine_dicts() and not combine_confs(); leave the
        # magic to the runner
        return combine_dicts(
            self._non_option_kwargs(),
            # don't screen out irrelevant opts (see #1898)
            self._kwargs_from_switches(set(_RUNNER_OPTS)),
            self._job_kwargs(),
        )

    def _runner_opt_names_for_help(self):
        opts = set(self._runner_class().OPT_NAMES)

        if self.options.runner == 'spark':
            # specific to Spark runner, but command-line only, so it doesn't
            # appear in SparkMRJobRunner.OPT_NAMES (see #2040)
            opts.add('max_output_files')

        return opts

    def _non_option_kwargs(self):
        """Keyword arguments to runner constructor that can't be set
        in mrjob.conf.

        These should match the (named) arguments to
        :py:meth:`~mrjob.runner.MRJobRunner.__init__`.
        """
        # build extra_args
        raw_args = _parse_raw_args(self.arg_parser, self._cl_args)

        extra_args = []

        for dest, option_string, args in raw_args:
            if dest in self._file_arg_dests:
                extra_args.append(option_string)
                extra_args.append(parse_legacy_hash_path('file', args[0]))
            elif dest in self._passthru_arg_dests:
                # special case for --hadoop-arg=-verbose etc.
                if (option_string and len(args) == 1 and
                        args[0].startswith('-')):
                    extra_args.append('%s=%s' % (option_string, args[0]))
                else:
                    if option_string:
                        extra_args.append(option_string)
                    extra_args.extend(args)

        # max_output_files is added by _add_runner_args() but can only
        # be set from the command line, so we add it here (see #2040)
        return dict(
            conf_paths=self.options.conf_paths,
            extra_args=extra_args,
            hadoop_input_format=self.hadoop_input_format(),
            hadoop_output_format=self.hadoop_output_format(),
            input_paths=self.options.args,
            max_output_files=self.options.max_output_files,
            mr_job_script=self._script_path,
            output_dir=self.options.output_dir,
            partitioner=self.partitioner(),
            stdin=self.stdin,
            step_output_dir=self.options.step_output_dir,
        )

    def _kwargs_from_switches(self, keys):
        return dict(
            (key, getattr(self.options, key))
            for key in keys if hasattr(self.options, key)
        )

    def _job_kwargs(self):
        """Keyword arguments to the runner class that can be specified
        by the job/launcher itself."""
        # use the most basic combiners; leave magic like resolving paths
        # and blanking out jobconf values to the runner
        return dict(
            # command-line has the final say on jobconf and libjars
            jobconf=combine_dicts(
                self.jobconf(), self.options.jobconf),
            libjars=combine_lists(
                self.libjars(), self.options.libjars),
            partitioner=self.partitioner(),
            sort_values=self.sort_values(),
            # TODO: should probably put self.options last below for consistency
            upload_archives=combine_lists(
                self.options.upload_archives, self.archives()),
            upload_dirs=combine_lists(
                self.options.upload_dirs, self.dirs()),
            upload_files=combine_lists(
                self.options.upload_files, self.files()),
        )

    ### Hooks for options defined by the job ###

    def archives(self):
        """See :py:meth:`mrjob.job.MRJob.files`."""
        return []

    def dirs(self):
        """See :py:meth:`mrjob.job.MRJob.dirs`."""
        return []

    def files(self):
        """See :py:meth:`mrjob.job.MRJob.files`."""
        return []

    def hadoop_input_format(self):
        """See :py:meth:`mrjob.job.MRJob.hadoop_input_format`."""
        return None

    def hadoop_output_format(self):
        """See :py:meth:`mrjob.job.MRJob.hadoop_output_format`."""
        return None

    def jobconf(self):
        """See :py:meth:`mrjob.job.MRJob.jobconf`."""
        return {}

    def libjars(self):
        """See :py:meth:`mrjob.job.MRJob.libjars`."""
        return []

    def partitioner(self):
        """See :py:meth:`mrjob.job.MRJob.partitioner`."""
        return None

    def sort_values(self):
        """See :py:meth:`mrjob.job.MRJob.sort_values`."""
        return None

    ### Testing ###

    def sandbox(self, stdin=None, stdout=None, stderr=None):
        """Redirect stdin, stdout, and stderr for automated testing.

        You can set stdin, stdout, and stderr to file objects. By
        default, they'll be set to empty ``BytesIO`` objects.
        You can then access the job's file handles through ``self.stdin``,
        ``self.stdout``, and ``self.stderr``. See :ref:`testing` for more
        information about testing.

        You may call sandbox multiple times (this will essentially clear
        the file handles).

        ``stdin`` is empty by default. You can set it to anything that yields
        lines::

            mr_job.sandbox(stdin=BytesIO(b'some_data\\n'))

        or, equivalently::

            mr_job.sandbox(stdin=[b'some_data\\n'])

        For convenience, this sandbox() returns self, so you can do::

            mr_job = MRJobClassToTest().sandbox()

        Simple testing example::

            mr_job = MRYourJob.sandbox()
            self.assertEqual(list(mr_job.reducer('foo', ['a', 'b'])), [...])

        More complex testing example::

            from BytesIO import BytesIO

            from mrjob.parse import parse_mr_job_stderr
            from mrjob.protocol import JSONProtocol

            mr_job = MRYourJob(args=[...])

            fake_input = '"foo"\\t"bar"\\n"foo"\\t"baz"\\n'
            mr_job.sandbox(stdin=BytesIO(fake_input))

            mr_job.run_reducer(link_num=0)

            self.assertEqual(mrjob.stdout.getvalue(), ...)
            self.assertEqual(parse_mr_job_stderr(mr_job.stderr), ...)

        .. note::

           If you are using Spark, it's recommended you only pass in
           :py:class:`io.BytesIO` or other serializable alternatives to file
           objects. *stdin*, *stdout*, and *stderr* get stored as job
           attributes, which means if they aren't serializable, neither
           is the job instance or its methods.
        """
        self._stdin = stdin or BytesIO()
        self._stdout = stdout or BytesIO()
        self._stderr = stderr or BytesIO()

        return self
Beispiel #5
0
class ArgMaster():
    '''
    A wrapper class for ArgumentParser with easy to use arguments. See update_parser() for details.
    sortme will sort all arguments by name (except positionals)
    other arguments are passed onto the ArgumentParser constructor
    '''
    def __init__(self,
                 sortme=True,
                 allow_abbrev=True,
                 usage=None,
                 description=None,
                 newline='\n',
                 verbose=False,
                 exit=True,
                 **kargs):

        # Parsing
        self.verbose = verbose  # Print what each argument does
        self.dashes = ('-', '--')  # - or -- before argument
        self.exit = exit  # Quit on error

        # Help Formatting:
        self.sortme = sortme  # Sort all non positionals args
        self.usage = usage  # Usage message
        self.description = description
        self.newline = newline  # Newlines in print_help
        self.autoformat = True  # Capitalize sentences and add a period

        # Internal
        self.parser = ArgumentParser(allow_abbrev=allow_abbrev,
                                     add_help=False,
                                     usage=SUPPRESS,
                                     **kargs)
        self.groups = []  # List of all groups

        # Allow optionals before positionals:
        self.intermixed = hasattr(self.parser, 'parse_intermixed_args')

    def parse(self, args=None, am_help=True, **kargs):
        "Parse the args and return them"
        if not args:
            args = sys.argv[1:]

        if not am_help:
            self.parser.add_help = True
            return self.parser.parse_args(args)

        # Match help
        for arg in args:
            if re.match('--*h$|--*help$', arg):
                self.print_help(**kargs)
                if self.exit:
                    sys.exit(0)
                return None
        try:
            if self.intermixed:
                return self.parser.parse_intermixed_args(args)
            else:
                return self.parser.parse_args(args)
        except SystemExit:
            self.print_help(**kargs)
            if self.exit:
                sys.exit(0)

    def print_help(self, show_type=True, wrap=-4, tab='  '):
        '''Print a custom help message using only ArgMaster args
        show_type = append the variable type expected after each optional argument.
        --arg <int> <int> will expect 2 integers after the arg
        wrap = word wrap instead of using full terminal. 0 = Terminal width
        sort = sort alphabetically. Positional variables are never sorted.
        To sort individual groups, add a special key: group.sortme = True

        Warning: If your variable is not in a group, it will not be shown!'''

        if self.description:
            print('\n' + self.description)

        if self.usage:
            name = os.path.basename(sys.argv[0])
            print('\n' + "Usage:", name, self.usage)
        final = []
        width = 0  # Max width of the variables column
        for group in self.groups:
            out = []
            for args in group['args']:
                msg = args['msg']
                if msg == SUPPRESS:
                    continue
                alias = args['alias']
                if show_type:
                    if args['typ'] and args['typ'] != bool:
                        if args['typ'] == list:
                            typ = '...'
                        else:
                            typ = '<' + str(args['typ']).replace(
                                'class ', '')[2:-2] + '>'
                        alias += ' ' + typ
                if len(alias) > width:
                    width = len(alias)
                out.append([alias, msg])

            if group['sortme'] is not None:
                sortme = group['sortme']
            else:
                sortme = self.sortme
            if sortme:
                # Sort the group while mainting the order of positional arguments at the top
                positionals = [
                    out.pop(line) for line in range(len(out) - 1, -1, -1)
                    if out[line][0].startswith('<')
                ]
                out.sort()
                out = list(reversed(positionals)) + out
            final.append(out)

        for index, out in enumerate(final):
            group = self.groups[index]
            if out:
                print(self.newline, end='')
                for line, _ in enumerate(out):
                    out[line][0] = tab + out[line][0].ljust(width)
                if 'title' in group:
                    print(group['title'])  # .rstrip(':') + ':')
                if 'description' in group:
                    auto_cols([[tab + group['description']]], wrap=wrap)
                auto_cols(out, wrap=wrap)
        print()

    def update(self, args, title=None, sortme=None, **kargs):
        "Pass list to update_parser() and append result to parser"
        group = self.parser.add_argument_group(title)
        args = self.update_parser(args, group, **kargs)
        self.groups.append(dict(args=args, title=title, sortme=sortme))

    def update_parser(self,
                      lines,
                      parser=None,
                      hidden=False,
                      positionals=False):
        '''
        A more intuitive method for adding arguments
        parser can be empty to return a new parser or a parser argument group
        hidden = Suppress arguments from showing up in help
        positionals = Make group positional arguments
        verbose = Show verbosely how each line in the array is added to argparse


        Format:
            Pass an array with lines in the format:

                ('alias', 'variable_name', type, default),
                "help string",

            You only need to include the fields required, but you can't skip over any.
                ('alias', '',)        # okay
                ('alias', type,)      # not okay

            Substitute the word list with a number like "2" to get that number of args required.
                ('list-args, '', 2)

            Positional arguments are optional by default, but you can specify a number to make them required.
            To use them, make sure to pass: positionals=True to update_parser

                ('pos-arg', '', 1)

        See what your arguments are producing by passing verbose=True or running easy_args.show_args(args)
        '''

        # Make sure the loop ends on a help string
        if not isinstance(lines[-1], str):
            lines.append("")

        alias = None  # --variable name
        varname = None  # Variable Name
        default = None  # Default value
        out = []

        def update():
            nonlocal alias
            "# Update argument to parser:"
            if parser:
                if positionals:
                    parser.add_argument(varname,
                                        default=default,
                                        nargs=nargs,
                                        help=msg)
                else:
                    # alias = '--' + alias
                    aliases = [d + alias for d in self.dashes]
                    if typ == bool:
                        parser.add_argument(*aliases,
                                            dest=varname,
                                            default=default,
                                            action=action,
                                            help=msg)
                    else:
                        parser.add_argument(*aliases,
                                            dest=varname,
                                            default=default,
                                            type=typ,
                                            nargs=nargs,
                                            help=msg,
                                            metavar='')
                out.append(
                    dict(alias=alias,
                         dest=varname,
                         typ=typ,
                         default=default,
                         msg=msg))
                if self.verbose:
                    print('alias  :', alias)
                    print('dest   :', varname)
                    print('default:', default)
                    print('type   :', typ)
                    print('nargs  :', nargs, '\n\n')

        for index, args in enumerate(lines):

            # Add help if available
            if isinstance(args, str):
                msg = undent(args.strip())
                if self.autoformat:
                    msg = msg.title()
                    if msg and not msg.endswith('.'):
                        last = msg.split()[-1]
                        if last[-1].isalnum() and not last.startswith('-'):
                            msg = msg + '.'
                if default:
                    msg += "  Default: " + str(default)

            if hidden:
                # Hide the help text:
                msg = SUPPRESS

            # If on a new tuple line, add_argument
            if alias or varname:
                update()
                alias = None
                varname = None
                msg = ""

            # Continue if not on a new tuple line
            if isinstance(args, str):
                continue

            # Read the values from the tuple:
            alias = args[0].lstrip('-')

            # Variable Name
            varname = list_get(args, 1)
            if not varname:
                varname = alias

            # Type
            typ = list_get(args, 2, str)

            # Default value
            if typ == list or type(typ) == int:
                default = list_get(args, 3, [])
            else:
                default = list_get(args, 3, '')

            # Argument Type and number required
            if typ == list:
                nargs = '*'
                typ = str
            elif isinstance(typ, int):
                if positionals and typ == 1:
                    nargs = None
                else:
                    nargs = typ
                typ = str
            else:
                nargs = '?'

            # Special handing for booleans
            if typ == bool:
                if default:
                    action = 'store_false'
                else:
                    action = 'store_true'
                    default = False
            if index == len(lines) - 1:
                update()

        return out
Beispiel #6
0
def main() -> None:
    """
    the main function loop
    :return: None
    """
    parser = ArgumentParser(description=description)
    parser.add_argument('--version',
                        action='version',
                        version=version)
    parser.add_argument('-v', "--verbose",
                        help='increase verbosity',
                        action='store_true',
                        default=False)

    server_group = parser.add_argument_group(title='server parameter')
    server_group.add_argument('url',
                              help='the url of the vault server',
                              type=str,
                              nargs='?',
                              default=os.environ.get('VAULT_ADDR', ''))
    server_group.add_argument('kv_store',
                              help='the kv store to export from',
                              type=str)

    output_group = parser.add_argument_group(title="output configuration")
    output_group.add_argument('--no-toc',
                              help='don\'t print the toc',
                              action='store_true',
                              default=False)
    output_group.add_argument('--no-content',
                              help='don\'t print the content',
                              action='store_true',
                              default=False)

    # login method group
    login_group = parser.add_mutually_exclusive_group(required=False)
    login_group.add_argument('--ldap',
                             help='login via ldap',
                             action='store_true')
    login_group.add_argument('--token',
                             help='login via token',
                             action='store_true')

    login_parameter_group = parser.add_argument_group(title='login parameter')
    login_parameter_group.add_argument('--username', '-u',
                                       help="the username with which to login, "
                                       + "if omitted you\'ll be asked")
    login_parameter_group.add_argument('--password', '-p',
                                       help='the password to login, if omitted you\'ll be asked')
    login_parameter_group.add_argument('--tokenLogin', '-t',
                                       help='the  token to login, if omitted you\'ll be asked',
                                       default=os.environ.get('VAULT_TOKEN', ''))

    args = parser.parse_intermixed_args()

    if args.verbose:
        log.basicConfig(format="%(message)s", level=log.DEBUG)
        log.info("Verbose output.")
    else:
        log.basicConfig(format="%(message)s")

    if args.no_toc and args.no_content:
        log.error("the usage of --no-toc and --no-content together does not produce any output")
        log.error("Aborting...")
        exit(2)

    config: Config = Config(args.url, args.kv_store)

    if args.ldap:
        client = config.login_via_ldap(args.username, args.password)
    elif args.token:
        client = config.login_via_token(args.tokenLogin)
    else:
        client = config.login()

    config.log()

    folder: Folder = get_base_folder(client, config.kv_store)

    print("`created by {name} {version} ({url})`\n".format(name=name, version=version, url=url))

    if not args.no_toc:
        print(folder.toc())

    if not args.no_content:
        print(folder.print())
Beispiel #7
0
 parser.add_argument('--multi-line',
                     action='store_true',
                     default=True,
                     help='Use multiline input (default).')
 #    parser.add_argument('--single-line',
 #        dest='multi_line', action='store_false',
 #        help='Do not use, WIP.')
 parser.add_argument(
     '--prompt-string',
     default="'> ",
     help=
     'Set hol prompt string to something that would never be printed in other circumstances.'
 )
 parser.add_argument('hol_path', help='hol executable path.')
 parser.add_argument('hol_args', nargs='*', help='hol arguments.')
 args = parser.parse_intermixed_args()
 del parser
 log_file = open(args.log_file, 'a',
                 encoding='utf-8') if args.log_file else None
 try:
     main(args.hol_path,
          args=args.hol_args,
          log_file=log_file,
          history_file=args.history_file,
          backend=args.backend,
          unicode=args.utf_8,
          multiline=args.multi_line,
          prompt_string=args.prompt_string)
 finally:
     if log_file:
         log_file.close()