Ejemplo n.º 1
0
def _get_spark_args(parser, cl_args):
    raw_args = _parse_raw_args(parser, cl_args)

    spark_args = []

    for dest, option_string, args in raw_args:
        if dest in _SPARK_ARG_OPT_NAMES:
            spark_args.append(option_string)
            spark_args.extend(args)

    return spark_args
Ejemplo n.º 2
0
def _get_spark_args(parser, cl_args):
    raw_args = _parse_raw_args(parser, cl_args)

    spark_args = []

    for dest, option_string, args in raw_args:
        if dest in _SPARK_ARG_OPT_NAMES:
            spark_args.append(option_string)
            spark_args.extend(args)

    return spark_args
Ejemplo n.º 3
0
    def spark(self, input_path, output_path):
        harness_args = [self.options.job_class, input_path, output_path]

        # find arguments to pass through to the Spark harness
        raw_args = _parse_raw_args(self.arg_parser, self._cl_args)

        for dest, option_string, args in raw_args:
            if option_string in _PASSTHRU_OPTION_STRINGS:
                harness_args.append(option_string)
                harness_args.extend(args)

        harness_main(harness_args)
Ejemplo n.º 4
0
    def spark(self, input_path, output_path):
        harness_args = [
            self.options.job_class, input_path, output_path]

        # find arguments to pass through to the Spark harness
        raw_args = _parse_raw_args(self.arg_parser, self._cl_args)

        for dest, option_string, args in raw_args:
            if option_string in _PASSTHRU_OPTION_STRINGS:
                harness_args.append(option_string)
                harness_args.extend(args)

        harness_main(harness_args)
Ejemplo n.º 5
0
    def _non_option_kwargs(self):
        """Keyword arguments to runner constructor that can't be set
        in mrjob.conf.

        These should match the (named) arguments to
        :py:meth:`~mrjob.runner.MRJobRunner.__init__`.
        """
        # build extra_args and file_upload_args
        #
        # TODO: deprecate file_upload_args, represent paths to upload
        # as dictionaries in extra_args
        raw_args = _parse_raw_args(self.arg_parser, self._cl_args)

        extra_args = []
        file_upload_args = []

        for dest, option_string, args in raw_args:
            if dest in self._passthru_arg_dests:
                # special case for --hadoop-arg=-verbose etc.
                if (option_string and len(args) == 1
                        and args[0].startswith('-')):
                    extra_args.append('%s=%s' % (option_string, args[0]))
                else:
                    if option_string:
                        extra_args.append(option_string)
                    extra_args.extend(args)

            elif dest in self._file_arg_dests:
                file_upload_args.append((option_string, args[0]))

        return dict(
            conf_paths=self.options.conf_paths,
            extra_args=extra_args,
            file_upload_args=file_upload_args,
            hadoop_input_format=self.hadoop_input_format(),
            hadoop_output_format=self.hadoop_output_format(),
            input_paths=self.options.args,
            mr_job_script=self._script_path,
            output_dir=self.options.output_dir,
            partitioner=self.partitioner(),
            stdin=self.stdin,
            step_output_dir=self.options.step_output_dir,
        )
Ejemplo n.º 6
0
    def _non_option_kwargs(self):
        """Keyword arguments to runner constructor that can't be set
        in mrjob.conf.

        These should match the (named) arguments to
        :py:meth:`~mrjob.runner.MRJobRunner.__init__`.
        """
        # build extra_args
        raw_args = _parse_raw_args(self.arg_parser, self._cl_args)

        extra_args = []

        for dest, option_string, args in raw_args:
            if dest in self._file_arg_dests:
                extra_args.append(option_string)
                extra_args.append(parse_legacy_hash_path('file', args[0]))
            elif dest in self._passthru_arg_dests:
                # special case for --hadoop-arg=-verbose etc.
                if (option_string and len(args) == 1 and
                        args[0].startswith('-')):
                    extra_args.append('%s=%s' % (option_string, args[0]))
                else:
                    if option_string:
                        extra_args.append(option_string)
                    extra_args.extend(args)

        # max_output_files is added by _add_runner_args() but can only
        # be set from the command line, so we add it here (see #2040)
        return dict(
            conf_paths=self.options.conf_paths,
            extra_args=extra_args,
            hadoop_input_format=self.hadoop_input_format(),
            hadoop_output_format=self.hadoop_output_format(),
            input_paths=self.options.args,
            max_output_files=self.options.max_output_files,
            mr_job_script=self._script_path,
            output_dir=self.options.output_dir,
            partitioner=self.partitioner(),
            stdin=self.stdin,
            step_output_dir=self.options.step_output_dir,
        )
Ejemplo n.º 7
0
Archivo: launch.py Proyecto: Yelp/mrjob
    def _non_option_kwargs(self):
        """Keyword arguments to runner constructor that can't be set
        in mrjob.conf.

        These should match the (named) arguments to
        :py:meth:`~mrjob.runner.MRJobRunner.__init__`.
        """
        # build extra_args
        raw_args = _parse_raw_args(self.arg_parser, self._cl_args)

        extra_args = []

        for dest, option_string, args in raw_args:
            if dest in self._file_arg_dests:
                extra_args.append(option_string)
                extra_args.append(parse_legacy_hash_path('file', args[0]))
            elif dest in self._passthru_arg_dests:
                # special case for --hadoop-arg=-verbose etc.
                if (option_string and len(args) == 1 and
                        args[0].startswith('-')):
                    extra_args.append('%s=%s' % (option_string, args[0]))
                else:
                    if option_string:
                        extra_args.append(option_string)
                    extra_args.extend(args)

        # max_output_files is added by _add_runner_args() but can only
        # be set from the command line, so we add it here (see #2040)
        return dict(
            conf_paths=self.options.conf_paths,
            extra_args=extra_args,
            hadoop_input_format=self.hadoop_input_format(),
            hadoop_output_format=self.hadoop_output_format(),
            input_paths=self.options.args,
            max_output_files=self.options.max_output_files,
            mr_job_script=self._script_path,
            output_dir=self.options.output_dir,
            partitioner=self.partitioner(),
            stdin=self.stdin,
            step_output_dir=self.options.step_output_dir,
        )