Python _parse_spark_log 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: mrjob.logs.spark

메소드/함수: _parse_spark_log

hotexamples.com에서의 예제들: 8

Python _parse_spark_log - 8개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 mrjob.logs.spark._parse_spark_log에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: test_spark.py 프로젝트: yzhanggithub/mrjob

 def test_single_line_error(self):
     self.assertEqual(
         _parse_spark_log(_SINGLE_LINE_ERROR.split('\n')),
         dict(errors=[
             dict(spark_error=(dict(
                 message=_SINGLE_LINE_ERROR[49:],
                 start_line=0,
                 num_lines=1,
             )))
         ]))

예제 #2

파일 보기

파일: test_spark.py 프로젝트: yzhanggithub/mrjob

 def test_multi_line_error(self):
     self.assertEqual(
         _parse_spark_log(_MULTI_LINE_ERROR.split('\n')),
         dict(errors=[
             dict(spark_error=dict(
                 message=_MULTI_LINE_ERROR[37:],
                 start_line=0,
                 num_lines=10,
             ))
         ]))

예제 #3

파일 보기

파일: test_spark.py 프로젝트: yzhanggithub/mrjob

 def test_multi_line_warning(self):
     # on the local-cluster master, Python Tracebacks are only available
     # from warnings, not errors
     self.assertEqual(
         _parse_spark_log(_MULTI_LINE_WARNING.split('\n')),
         dict(errors=[
             dict(spark_error=dict(
                 message=_MULTI_LINE_WARNING[180:],
                 start_line=1,
                 num_lines=13,
             ))
         ]))

예제 #4

파일 보기

파일: test_spark.py 프로젝트: yzhanggithub/mrjob

    def test_multiple_errors(self):
        ERRORS = '\n'.join(
            [_SINGLE_LINE_ERROR, _MULTI_LINE_ERROR, _MULTI_LINE_WARNING])

        self.assertEqual(
            _parse_spark_log(ERRORS.split('\n')),
            dict(errors=[
                dict(spark_error=(dict(
                    message=_SINGLE_LINE_ERROR[49:],
                    start_line=0,
                    num_lines=1,
                ))),
                dict(spark_error=dict(
                    message=_MULTI_LINE_ERROR[37:],
                    start_line=1,
                    num_lines=10,
                )),
                dict(spark_error=dict(
                    message=_MULTI_LINE_WARNING[180:],
                    start_line=12,
                    num_lines=13,
                ))
            ]))

예제 #5

파일 보기

    def _run_spark_submit(self, spark_submit_args, env, record_callback):
        """Run the spark submit binary in a subprocess, using a PTY if possible

        :param spark_submit_args: spark-submit binary and arguments, as as list
        :param env: environment variables, as a dict
        :param record_callback: a function that takes a single log4j record
                                as its argument (see
                                :py:func:`~mrjob.logs.log4j\
                                ._parse_hadoop_log4j_records)

        :return: tuple of the subprocess's return code and a
                 step interpretation dictionary
        """
        log.debug('> %s' % cmd_line(spark_submit_args))
        log.debug('  with environment: %r' % sorted(env.items()))

        # these should always be set, but just in case
        returncode = 0
        step_interpretation = {}

        # try to use a PTY if it's available
        try:
            pid, master_fd = pty.fork()
        except (AttributeError, OSError):
            # no PTYs, just use Popen

            # user won't get much feedback for a while, so tell them
            # spark-submit is running
            log.debug('No PTY available, using Popen() to invoke spark-submit')

            step_proc = Popen(
                spark_submit_args, stdout=PIPE, stderr=PIPE, env=env)

            # parse driver output
            step_interpretation = _parse_spark_log(
                step_proc.stderr, record_callback=record_callback)

            # there shouldn't be much output on STDOUT, just echo it
            for record in _parse_hadoop_log4j_records(step_proc.stdout):
                record_callback(record)

            step_proc.stdout.close()
            step_proc.stderr.close()

            returncode = step_proc.wait()
        else:
            # we have PTYs
            if pid == 0:  # we are the child process
                try:
                    os.execvpe(spark_submit_args[0], spark_submit_args, env)
                    # now this process is no longer Python
                except OSError as ex:
                    # use _exit() so we don't do cleanup, etc. that's
                    # the parent process's job
                    os._exit(ex.errno)
                finally:
                    # if we get some other exception, still exit hard
                    os._exit(-1)
            else:
                log.debug('Invoking spark-submit via PTY')

                with os.fdopen(master_fd, 'rb') as master:
                    step_interpretation = (
                        _parse_spark_log(
                            _eio_to_eof(master),
                            record_callback=record_callback))

                    _, returncode = os.waitpid(pid, 0)

        return (returncode, step_interpretation)

예제 #6

파일 보기

파일: test_spark.py 프로젝트: yzhanggithub/mrjob

 def test_ignore_single_line_warning(self):
     # single-line warnings can be all sorts of irrelevant things
     self.assertEqual(_parse_spark_log(_SINGLE_LINE_WARNING.split('\n')),
                      {})

예제 #7

파일 보기

파일: test_spark.py 프로젝트: yzhanggithub/mrjob

 def test_empty(self):
     self.assertEqual(_parse_spark_log([]), {})

예제 #8

파일 보기

파일: test_spark.py 프로젝트: yzhanggithub/mrjob

 def test_application_id(self):
     self.assertEqual(_parse_spark_log(_APPLICATION_ID_LINE.split('\n')),
                      dict(application_id='application_1568415025507_0001'))