Ejemplo n.º 1
0
                                     stderr=_std)
    ld = dict(bash_returncode=returncode, stdout=stdout, stderr=stderr, **ld)
    if returncode == -9:
        log_and_raise("Bash job timed out", ld)
    elif returncode != 0:
        # this raises an error and logs output:
        log_and_raise("Bash job failed", ld)
    else:
        log.info("Bash job succeeded", extra=ld)


build_arg_parser = at.build_arg_parser([
    at.group(
        'Bash Job Options',
        at.add_argument(
            '--bash_cmd',
            nargs=REMAINDER,
            help=("All remaining args are passed to the bash script. ie: "
                  " myscript.sh arg1 --arg2 -c=4")),
        at.add_argument('--watch',
                        type=int,
                        default=-1,
                        help=("Initiate a watchdog that will kill the process"
                              " after a given seconds")),
        at.add_argument('--redirect_to_stderr',
                        action='store_true',
                        help=("Rather than capturing output and logging it,"
                              " send output directly to sys.stderr")),
    )
])
Ejemplo n.º 2
0
    log.info('running command', extra=ld)
    returncode, stdout, stderr = run(
        cmd, shell=True, timeout=ns.watch, stdout=_std, stderr=_std)
    ld = dict(bash_returncode=returncode, stdout=stdout, stderr=stderr, **ld)
    if returncode == -9:
        log_and_raise("Bash job timed out", ld)
    elif returncode != 0:
        # this raises an error and logs output:
        log_and_raise("Bash job failed", ld)
    else:
        log.info("Bash job succeeded", extra=ld)


build_arg_parser = at.build_arg_parser([at.group(
    'Bash Job Options',
    at.add_argument(
        '--bash_cmd', nargs=REMAINDER, help=(
            "All remaining args are passed to the bash script. ie: "
            " myscript.sh arg1 --arg2 -c=4"
        )),
    at.add_argument(
        '--watch', type=int, default=-1, help=(
            "Initiate a watchdog that will kill the process"
            " after a given seconds"
        )),
    at.add_argument(
        '--redirect_to_stderr', action='store_true', help=(
            "Rather than capturing output and logging it,"
            " send output directly to sys.stderr")),
)])
Ejemplo n.º 3
0

def _validate_sample_size(str_i):
    """Helper for --sample argument option in argparse"""
    i = float(str_i)
    assert 0 <= i <= 1, "given sample size must be a number between [0, 1]"
    return i


_build_arg_parser = at.build_arg_parser([at.group(
    'Spark Job Options: How should given module.main process data?',
    at.app_name,
    at.group(
        "Preprocess data",
        at.add_argument(
            '--mapjson', action='store_true', help=(
                'convert each element in the textFile to json before doing'
                ' anything with it.')),
        at.add_argument(
            '--sample', type=_validate_sample_size,
            help="Sample n percent of the data without replacement"),
    ),
    at.add_argument('--read_fp'),
    at.add_argument('--write_fp'),
    at.add_argument(
        '--spark_conf', nargs='*', type=lambda x: x.split('='), default=[],
        help=("A list of key=value pairs that override"
              " the task's default settings. ie:"
              " spark.master=local[4] spark.ui.port=4046")),
    at.add_argument(
        '--spark_env', nargs='+', type=lambda x: x.split('='), default=[],
        help=("A list of key=value pairs to add to the spark executor's"