def _get_spark_args(parser, cl_args): raw_args = _parse_raw_args(parser, cl_args) spark_args = [] for dest, option_string, args in raw_args: if dest in _SPARK_ARG_OPT_NAMES: spark_args.append(option_string) spark_args.extend(args) return spark_args
def _get_spark_args(parser, cl_args): raw_args = _parse_raw_args(parser, cl_args) spark_args = [] for dest, option_string, args in raw_args: if dest in _SPARK_ARG_OPT_NAMES: spark_args.append(option_string) spark_args.extend(args) return spark_args
def spark(self, input_path, output_path): harness_args = [self.options.job_class, input_path, output_path] # find arguments to pass through to the Spark harness raw_args = _parse_raw_args(self.arg_parser, self._cl_args) for dest, option_string, args in raw_args: if option_string in _PASSTHRU_OPTION_STRINGS: harness_args.append(option_string) harness_args.extend(args) harness_main(harness_args)
def spark(self, input_path, output_path): harness_args = [ self.options.job_class, input_path, output_path] # find arguments to pass through to the Spark harness raw_args = _parse_raw_args(self.arg_parser, self._cl_args) for dest, option_string, args in raw_args: if option_string in _PASSTHRU_OPTION_STRINGS: harness_args.append(option_string) harness_args.extend(args) harness_main(harness_args)
def _non_option_kwargs(self): """Keyword arguments to runner constructor that can't be set in mrjob.conf. These should match the (named) arguments to :py:meth:`~mrjob.runner.MRJobRunner.__init__`. """ # build extra_args and file_upload_args # # TODO: deprecate file_upload_args, represent paths to upload # as dictionaries in extra_args raw_args = _parse_raw_args(self.arg_parser, self._cl_args) extra_args = [] file_upload_args = [] for dest, option_string, args in raw_args: if dest in self._passthru_arg_dests: # special case for --hadoop-arg=-verbose etc. if (option_string and len(args) == 1 and args[0].startswith('-')): extra_args.append('%s=%s' % (option_string, args[0])) else: if option_string: extra_args.append(option_string) extra_args.extend(args) elif dest in self._file_arg_dests: file_upload_args.append((option_string, args[0])) return dict( conf_paths=self.options.conf_paths, extra_args=extra_args, file_upload_args=file_upload_args, hadoop_input_format=self.hadoop_input_format(), hadoop_output_format=self.hadoop_output_format(), input_paths=self.options.args, mr_job_script=self._script_path, output_dir=self.options.output_dir, partitioner=self.partitioner(), stdin=self.stdin, step_output_dir=self.options.step_output_dir, )
def _non_option_kwargs(self): """Keyword arguments to runner constructor that can't be set in mrjob.conf. These should match the (named) arguments to :py:meth:`~mrjob.runner.MRJobRunner.__init__`. """ # build extra_args raw_args = _parse_raw_args(self.arg_parser, self._cl_args) extra_args = [] for dest, option_string, args in raw_args: if dest in self._file_arg_dests: extra_args.append(option_string) extra_args.append(parse_legacy_hash_path('file', args[0])) elif dest in self._passthru_arg_dests: # special case for --hadoop-arg=-verbose etc. if (option_string and len(args) == 1 and args[0].startswith('-')): extra_args.append('%s=%s' % (option_string, args[0])) else: if option_string: extra_args.append(option_string) extra_args.extend(args) # max_output_files is added by _add_runner_args() but can only # be set from the command line, so we add it here (see #2040) return dict( conf_paths=self.options.conf_paths, extra_args=extra_args, hadoop_input_format=self.hadoop_input_format(), hadoop_output_format=self.hadoop_output_format(), input_paths=self.options.args, max_output_files=self.options.max_output_files, mr_job_script=self._script_path, output_dir=self.options.output_dir, partitioner=self.partitioner(), stdin=self.stdin, step_output_dir=self.options.step_output_dir, )
def _non_option_kwargs(self): """Keyword arguments to runner constructor that can't be set in mrjob.conf. These should match the (named) arguments to :py:meth:`~mrjob.runner.MRJobRunner.__init__`. """ # build extra_args raw_args = _parse_raw_args(self.arg_parser, self._cl_args) extra_args = [] for dest, option_string, args in raw_args: if dest in self._file_arg_dests: extra_args.append(option_string) extra_args.append(parse_legacy_hash_path('file', args[0])) elif dest in self._passthru_arg_dests: # special case for --hadoop-arg=-verbose etc. if (option_string and len(args) == 1 and args[0].startswith('-')): extra_args.append('%s=%s' % (option_string, args[0])) else: if option_string: extra_args.append(option_string) extra_args.extend(args) # max_output_files is added by _add_runner_args() but can only # be set from the command line, so we add it here (see #2040) return dict( conf_paths=self.options.conf_paths, extra_args=extra_args, hadoop_input_format=self.hadoop_input_format(), hadoop_output_format=self.hadoop_output_format(), input_paths=self.options.args, max_output_files=self.options.max_output_files, mr_job_script=self._script_path, output_dir=self.options.output_dir, partitioner=self.partitioner(), stdin=self.stdin, step_output_dir=self.options.step_output_dir, )