Ejemplo n.º 1
0
    def configure_options(self):
        """Define arguments for this script. Called from :py:meth:`__init__()`.

        Run ``python -m mrjob.job.MRJob --help`` to see all options.

        Re-define to define custom command-line arguments::

            def configure_options(self):
                super(MRYourJob, self).configure_options

                self.add_passthrough_option(...)
                self.add_file_option(...)
                ...
        """
        self.option_parser.add_option('--help',
                                      dest='help_main',
                                      action='store_true',
                                      default=False,
                                      help='show this message and exit')

        self.option_parser.add_option('--help-emr',
                                      dest='help_emr',
                                      action='store_true',
                                      default=False,
                                      help='show EMR-related options')

        self.option_parser.add_option('--help-hadoop',
                                      dest='help_hadoop',
                                      action='store_true',
                                      default=False,
                                      help='show Hadoop-related options')

        self.option_parser.add_option('--help-runner',
                                      dest='help_runner',
                                      action='store_true',
                                      default=False,
                                      help='show runner-related options')

        # protocol stuff
        self.proto_opt_group = OptionGroup(self.option_parser, 'Protocols')
        self.option_parser.add_option_group(self.proto_opt_group)

        add_protocol_opts(self.proto_opt_group)

        # options for running the entire job
        self.runner_opt_group = OptionGroup(self.option_parser,
                                            'Running the entire job')
        self.option_parser.add_option_group(self.runner_opt_group)

        add_runner_opts(self.runner_opt_group, self._DEFAULT_RUNNER)
        add_basic_opts(self.runner_opt_group)

        self.hadoop_opts_opt_group = OptionGroup(
            self.option_parser,
            'Configuring or emulating Hadoop (these apply when you set -r'
            ' hadoop, -r emr, or -r local)')
        self.option_parser.add_option_group(self.hadoop_opts_opt_group)

        add_hadoop_shared_opts(self.hadoop_opts_opt_group)

        # options common to Hadoop and EMR
        self.hadoop_emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop or EMR (these apply when you set -r hadoop or'
            ' -r emr)')
        self.option_parser.add_option_group(self.hadoop_emr_opt_group)

        add_hadoop_emr_opts(self.hadoop_emr_opt_group)

        # options for running the job on Hadoop
        self.hadoop_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop (these apply when you set -r hadoop)')
        self.option_parser.add_option_group(self.hadoop_opt_group)

        add_hadoop_opts(self.hadoop_opt_group)

        # options for running the job on EMR
        self.emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on Amazon Elastic MapReduce (these apply when you set -r'
            ' emr)')
        self.option_parser.add_option_group(self.emr_opt_group)

        add_emr_opts(self.emr_opt_group)
Ejemplo n.º 2
0
    def configure_options(self):
        """Define arguments for this script. Called from :py:meth:`__init__()`.

        Run ``python -m mrjob.job.MRJob --help`` to see all options.

        Re-define to define custom command-line arguments::

            def configure_options(self):
                super(MRYourJob, self).configure_options

                self.add_passthrough_option(...)
                self.add_file_option(...)
                ...
        """
        self.option_parser.add_option(
            '--help', dest='help_main', action='store_true', default=False,
            help='show this message and exit')

        self.option_parser.add_option(
            '--help-emr', dest='help_emr', action='store_true', default=False,
            help='show EMR-related options')

        self.option_parser.add_option(
            '--help-hadoop', dest='help_hadoop', action='store_true',
            default=False,
            help='show Hadoop-related options')

        self.option_parser.add_option(
            '--help-runner', dest='help_runner', action='store_true',
            default=False, help='show runner-related options')

        # protocol stuff
        self.proto_opt_group = OptionGroup(
            self.option_parser, 'Protocols')
        self.option_parser.add_option_group(self.proto_opt_group)

        self._passthrough_options.extend(
            add_protocol_opts(self.proto_opt_group))

        # options for running the entire job
        self.runner_opt_group = OptionGroup(
            self.option_parser, 'Running the entire job')
        self.option_parser.add_option_group(self.runner_opt_group)

        add_runner_opts(self.runner_opt_group, self._DEFAULT_RUNNER)
        add_basic_opts(self.runner_opt_group)

        self.hadoop_opts_opt_group = OptionGroup(
            self.option_parser,
            'Configuring or emulating Hadoop (these apply when you set -r'
            ' hadoop, -r emr, or -r local)')
        self.option_parser.add_option_group(self.hadoop_opts_opt_group)

        add_hadoop_shared_opts(self.hadoop_opts_opt_group)

        # options common to Hadoop and EMR
        self.hadoop_emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop or EMR (these apply when you set -r hadoop or'
            ' -r emr)')
        self.option_parser.add_option_group(self.hadoop_emr_opt_group)

        add_hadoop_emr_opts(self.hadoop_emr_opt_group)

        # options for running the job on Hadoop
        self.hadoop_opt_group = OptionGroup(
            self.option_parser,
            'Running on Hadoop (these apply when you set -r hadoop)')
        self.option_parser.add_option_group(self.hadoop_opt_group)

        add_hadoop_opts(self.hadoop_opt_group)

        # options for running the job on EMR
        self.emr_opt_group = OptionGroup(
            self.option_parser,
            'Running on Amazon Elastic MapReduce (these apply when you set -r'
            ' emr)')
        self.option_parser.add_option_group(self.emr_opt_group)

        add_emr_opts(self.emr_opt_group)