stderr=_std) ld = dict(bash_returncode=returncode, stdout=stdout, stderr=stderr, **ld) if returncode == -9: log_and_raise("Bash job timed out", ld) elif returncode != 0: # this raises an error and logs output: log_and_raise("Bash job failed", ld) else: log.info("Bash job succeeded", extra=ld) build_arg_parser = at.build_arg_parser([ at.group( 'Bash Job Options', at.add_argument( '--bash_cmd', nargs=REMAINDER, help=("All remaining args are passed to the bash script. ie: " " myscript.sh arg1 --arg2 -c=4")), at.add_argument('--watch', type=int, default=-1, help=("Initiate a watchdog that will kill the process" " after a given seconds")), at.add_argument('--redirect_to_stderr', action='store_true', help=("Rather than capturing output and logging it," " send output directly to sys.stderr")), ) ])
log.info('running command', extra=ld) returncode, stdout, stderr = run( cmd, shell=True, timeout=ns.watch, stdout=_std, stderr=_std) ld = dict(bash_returncode=returncode, stdout=stdout, stderr=stderr, **ld) if returncode == -9: log_and_raise("Bash job timed out", ld) elif returncode != 0: # this raises an error and logs output: log_and_raise("Bash job failed", ld) else: log.info("Bash job succeeded", extra=ld) build_arg_parser = at.build_arg_parser([at.group( 'Bash Job Options', at.add_argument( '--bash_cmd', nargs=REMAINDER, help=( "All remaining args are passed to the bash script. ie: " " myscript.sh arg1 --arg2 -c=4" )), at.add_argument( '--watch', type=int, default=-1, help=( "Initiate a watchdog that will kill the process" " after a given seconds" )), at.add_argument( '--redirect_to_stderr', action='store_true', help=( "Rather than capturing output and logging it," " send output directly to sys.stderr")), )])
def _validate_sample_size(str_i): """Helper for --sample argument option in argparse""" i = float(str_i) assert 0 <= i <= 1, "given sample size must be a number between [0, 1]" return i _build_arg_parser = at.build_arg_parser([at.group( 'Spark Job Options: How should given module.main process data?', at.app_name, at.group( "Preprocess data", at.add_argument( '--mapjson', action='store_true', help=( 'convert each element in the textFile to json before doing' ' anything with it.')), at.add_argument( '--sample', type=_validate_sample_size, help="Sample n percent of the data without replacement"), ), at.add_argument('--read_fp'), at.add_argument('--write_fp'), at.add_argument( '--spark_conf', nargs='*', type=lambda x: x.split('='), default=[], help=("A list of key=value pairs that override" " the task's default settings. ie:" " spark.master=local[4] spark.ui.port=4046")), at.add_argument( '--spark_env', nargs='+', type=lambda x: x.split('='), default=[], help=("A list of key=value pairs to add to the spark executor's"