def configure_options(self): """Define arguments for this script. Called from :py:meth:`__init__()`. Run ``python -m mrjob.job.MRJob --help`` to see all options. Re-define to define custom command-line arguments:: def configure_options(self): super(MRYourJob, self).configure_options self.add_passthrough_option(...) self.add_file_option(...) ... """ self.option_parser.add_option('--help', dest='help_main', action='store_true', default=False, help='show this message and exit') self.option_parser.add_option('--help-emr', dest='help_emr', action='store_true', default=False, help='show EMR-related options') self.option_parser.add_option('--help-hadoop', dest='help_hadoop', action='store_true', default=False, help='show Hadoop-related options') self.option_parser.add_option('--help-runner', dest='help_runner', action='store_true', default=False, help='show runner-related options') # protocol stuff self.proto_opt_group = OptionGroup(self.option_parser, 'Protocols') self.option_parser.add_option_group(self.proto_opt_group) add_protocol_opts(self.proto_opt_group) # options for running the entire job self.runner_opt_group = OptionGroup(self.option_parser, 'Running the entire job') self.option_parser.add_option_group(self.runner_opt_group) add_runner_opts(self.runner_opt_group, self._DEFAULT_RUNNER) add_basic_opts(self.runner_opt_group) self.hadoop_opts_opt_group = OptionGroup( self.option_parser, 'Configuring or emulating Hadoop (these apply when you set -r' ' hadoop, -r emr, or -r local)') self.option_parser.add_option_group(self.hadoop_opts_opt_group) add_hadoop_shared_opts(self.hadoop_opts_opt_group) # options common to Hadoop and EMR self.hadoop_emr_opt_group = OptionGroup( self.option_parser, 'Running on Hadoop or EMR (these apply when you set -r hadoop or' ' -r emr)') self.option_parser.add_option_group(self.hadoop_emr_opt_group) add_hadoop_emr_opts(self.hadoop_emr_opt_group) # options for running the job on Hadoop self.hadoop_opt_group = OptionGroup( self.option_parser, 'Running on Hadoop (these apply when you set -r hadoop)') self.option_parser.add_option_group(self.hadoop_opt_group) add_hadoop_opts(self.hadoop_opt_group) # options for running the job on EMR self.emr_opt_group = OptionGroup( self.option_parser, 'Running on Amazon Elastic MapReduce (these apply when you set -r' ' emr)') self.option_parser.add_option_group(self.emr_opt_group) add_emr_opts(self.emr_opt_group)
def configure_options(self): """Define arguments for this script. Called from :py:meth:`__init__()`. Run ``python -m mrjob.job.MRJob --help`` to see all options. Re-define to define custom command-line arguments:: def configure_options(self): super(MRYourJob, self).configure_options self.add_passthrough_option(...) self.add_file_option(...) ... """ self.option_parser.add_option( '--help', dest='help_main', action='store_true', default=False, help='show this message and exit') self.option_parser.add_option( '--help-emr', dest='help_emr', action='store_true', default=False, help='show EMR-related options') self.option_parser.add_option( '--help-hadoop', dest='help_hadoop', action='store_true', default=False, help='show Hadoop-related options') self.option_parser.add_option( '--help-runner', dest='help_runner', action='store_true', default=False, help='show runner-related options') # protocol stuff self.proto_opt_group = OptionGroup( self.option_parser, 'Protocols') self.option_parser.add_option_group(self.proto_opt_group) self._passthrough_options.extend( add_protocol_opts(self.proto_opt_group)) # options for running the entire job self.runner_opt_group = OptionGroup( self.option_parser, 'Running the entire job') self.option_parser.add_option_group(self.runner_opt_group) add_runner_opts(self.runner_opt_group, self._DEFAULT_RUNNER) add_basic_opts(self.runner_opt_group) self.hadoop_opts_opt_group = OptionGroup( self.option_parser, 'Configuring or emulating Hadoop (these apply when you set -r' ' hadoop, -r emr, or -r local)') self.option_parser.add_option_group(self.hadoop_opts_opt_group) add_hadoop_shared_opts(self.hadoop_opts_opt_group) # options common to Hadoop and EMR self.hadoop_emr_opt_group = OptionGroup( self.option_parser, 'Running on Hadoop or EMR (these apply when you set -r hadoop or' ' -r emr)') self.option_parser.add_option_group(self.hadoop_emr_opt_group) add_hadoop_emr_opts(self.hadoop_emr_opt_group) # options for running the job on Hadoop self.hadoop_opt_group = OptionGroup( self.option_parser, 'Running on Hadoop (these apply when you set -r hadoop)') self.option_parser.add_option_group(self.hadoop_opt_group) add_hadoop_opts(self.hadoop_opt_group) # options for running the job on EMR self.emr_opt_group = OptionGroup( self.option_parser, 'Running on Amazon Elastic MapReduce (these apply when you set -r' ' emr)') self.option_parser.add_option_group(self.emr_opt_group) add_emr_opts(self.emr_opt_group)