def test_file_from_working_directory(): with open(os.path.join(os.getcwd(), CONFIG_FILENAME), 'w'): pass try: assert get_config_file() == os.path.join(os.getcwd(), CONFIG_FILENAME) finally: os.remove(os.path.join(os.getcwd(), CONFIG_FILENAME))
def test_file_from_working_directory(): with open(os.path.join(os.getcwd(), CONFIG_FILENAME), 'w'): try: assert get_config_file() == os.path.join(os.getcwd(), CONFIG_FILENAME) finally: os.remove(os.path.join(os.getcwd(), CONFIG_FILENAME))
def test_file_from_home_directory(): with open(os.path.join(os.path.expanduser('~'), CONFIG_FILENAME), 'w'): pass try: assert get_config_file() == os.path.join(os.path.expanduser('~'), CONFIG_FILENAME) finally: os.remove(os.path.expanduser('~') + '/' + CONFIG_FILENAME)
def test_file_from_home_directory(): with open(os.path.join( os.path.expanduser('~'), CONFIG_FILENAME), 'w'): try: assert get_config_file() == os.path.join( os.path.expanduser('~'), CONFIG_FILENAME) finally: os.remove(os.path.expanduser('~') + '/' + CONFIG_FILENAME)
def test_no_file_is_ok(): assert get_config_file() is None
def parse_args(argv, standalone=False, deployment_aware=False): both_set = standalone and deployment_aware assert not both_set, 'Both options can not be used in the same time' defaults = { 'prompt': None, 'out': 'out.csv', 'create_api_token': False, 'timeout': None, 'n_samples': False, 'n_concurrent': 4, 'n_retry': 3, 'resume': None, 'fast': False, 'stdout': False, 'auto_sample': False, 'api_version': PRED_API_V10, 'max_prediction_explanations': 0 } parser = argparse.ArgumentParser( description=DESCRIPTION, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--verbose', '-v', action="store_true", help='Provides status updates while ' 'the script is running.') parser.add_argument('--version', action='version', version=VERSION_TEMPLATE, help='Show version') dataset_gr = parser.add_argument_group('Dataset and server') dataset_gr.add_argument('--host', type=str, help='Specifies the protocol (http or https) and ' 'hostname of the prediction API endpoint. ' 'E.g. "https://example.orm.datarobot.com"') dataset_gr.add_argument('--out', type=str, nargs='?', default=defaults['out'], help='Specifies the file name, ' 'and optionally path, ' 'to which the results are written. ' 'If not specified, ' 'the default file name is out.csv, ' 'written to the directory containing the script. ' '(default: %(default)r)') if standalone: dataset_gr.add_argument('import_id', type=str, help='Specifies the project ' 'identification string.') else: dataset_gr.add_argument('--api_version', type=str, choices=RESPONSE_HANDLERS.keys(), default=defaults['api_version'], help='Specifies API version. ' '(default: %(default)r)') if deployment_aware: dataset_gr.add_argument('deployment_id', type=str, help='Specifies the model deployment ' 'identification string.') else: dataset_gr.add_argument('project_id', type=str, help='Specifies the project ' 'identification string.') dataset_gr.add_argument('model_id', type=str, help='Specifies the model identification ' 'string.') auth_gr = parser.add_argument_group('Authentication parameters') auth_gr.add_argument('--user', type=str, help='Specifies the username used to acquire ' 'the api-token. ' 'Use quotes if the name contains spaces.') auth_gr.add_argument('--password', type=str, nargs='?', help='Specifies the password used to acquire ' 'the api-token. ' 'Use quotes if the name contains spaces.') auth_gr.add_argument('--api_token', type=str, nargs='?', help='Specifies the api token for the requests; ' 'if you do not have a token, ' 'you must specify the password argument.') auth_gr.add_argument('--create_api_token', action="store_true", default=defaults['create_api_token'], help='Requests a new API token. To use this ' 'option, you must specify the ' 'password argument for this request ' '(not the api_token argument). ' '(default: %(default)r)') auth_gr.add_argument('--datarobot_key', type=str, nargs='?', help='An additional datarobot_key ' 'for dedicated prediction instances.') dataset_gr.add_argument('dataset', type=str, help='Specifies the .csv input file that ' 'the script scores.') dataset_gr.add_argument('--max_prediction_explanations', type=int, default=defaults['max_prediction_explanations'], help='The maximum number of prediction ' 'explanations that will be generate for ' 'each prediction.' 'Not compatible with api version `api/v1`') conn_gr = parser.add_argument_group('Connection control') conn_gr.add_argument('--timeout', type=int, default=defaults['timeout'], help='The timeout for each post request. ' '(default: %(default)r)') conn_gr.add_argument('--n_samples', type=int, nargs='?', default=defaults['n_samples'], help='Specifies the number of samples ' '(rows) to use per batch. If not defined the ' '"auto_sample" option will be used.') conn_gr.add_argument('--n_concurrent', type=int, nargs='?', default=defaults['n_concurrent'], help='Specifies the number of concurrent requests ' 'to submit. (default: %(default)r)') conn_gr.add_argument('--n_retry', type=int, default=defaults['n_retry'], help='Specifies the number of times DataRobot ' 'will retry if a request fails. ' 'A value of -1 specifies an infinite ' 'number of retries. (default: %(default)r)') conn_gr.add_argument('--resume', dest='resume', action='store_true', default=defaults['resume'], help='Starts the prediction from the point at which ' 'it was halted. ' 'If the prediction stopped, for example due ' 'to error or network connection issue, you can run ' 'the same command with all the same ' 'all arguments plus this resume argument.') conn_gr.add_argument('--no-resume', dest='resume', action='store_false', help='Starts the prediction from scratch disregarding' ' previous run.') conn_gr.add_argument('--compress', action='store_true', default=False, help='Compress batch. This can improve throughout ' 'when bandwidth is limited.') conn_gr.add_argument('--ca_bundle', dest='verify_ssl', metavar='PATH', default=True, help='Specifies the path to a CA_BUNDLE file or ' 'directory with certificates of ' 'trusted Certificate Authorities (CAs) ' 'to be used for SSL verification. ' 'By default the system\'s set of trusted ' 'certificates will be used.') conn_gr.add_argument('--no_verify_ssl', action='store_false', dest='verify_ssl', help='Skip SSL certificates verification for HTTPS ' 'endpoints. Using this flag will cause the ' 'argument for ca_bundle to be ignored.') csv_gr = parser.add_argument_group('CSV parameters') csv_gr.add_argument('--keep_cols', type=str, nargs='?', help='Specifies the column names to append ' 'to the predictions. ' 'Enter as a comma-separated list.') csv_gr.add_argument('--delimiter', type=str, nargs='?', default=None, help='Specifies the delimiter to recognize in ' 'the input .csv file. E.g. "--delimiter=,". ' 'If not specified, the script tries to automatically ' 'determine the delimiter. The special keyword "tab" ' 'can be used to indicate a tab delimited csv. "pipe"' 'can be used to indicate "|"') csv_gr.add_argument('--pred_name', type=str, nargs='?', default=None, help='Specifies column name for prediction results, ' 'empty name is used if not specified. For binary ' 'predictions assumes last class in lexical order ' 'as positive') csv_gr.add_argument('--pred_threshold', type=str, nargs='?', default=None, help='Specifies column name for prediction threshold ' 'for binary classification. Column will not be ' 'included if not specified') csv_gr.add_argument('--pred_decision', type=str, nargs='?', default=None, help='Specifies column name for prediction decision, ' 'the value predicted by the model (class label ' 'for classification)') csv_gr.add_argument('--fast', action='store_true', default=defaults['fast'], help='Experimental: faster CSV processor. ' 'Note: does not support multiline csv. ') csv_gr.add_argument('--auto_sample', action='store_true', default=defaults['auto_sample'], help='Override "n_samples" and instead ' 'use chunks of about 1.5 MB. This is recommended and ' 'enabled by default if "n_samples" is not defined.') csv_gr.add_argument('--encoding', type=str, default='', help='Declare the dataset encoding. ' 'If an encoding is not provided the batch_scoring ' 'script attempts to detect it. E.g "utf-8", "latin-1" ' 'or "iso2022_jp". See the Python docs for a list of ' 'valid encodings ' 'https://docs.python.org/3/library/codecs.html' '#standard-encodings') csv_gr.add_argument('--skip_dialect', action='store_true', default=False, help='Tell the batch_scoring script ' 'to skip csv dialect detection.') csv_gr.add_argument('--skip_row_id', action='store_true', default=False, help='Skip the row_id column in output.') csv_gr.add_argument('--output_delimiter', type=str, default=None, help='Set the delimiter for output file.The special ' 'keyword "tab" can be used to indicate a tab ' 'delimited csv. "pipe" can be used to indicate ' '"|"') csv_gr.add_argument('--field_size_limit', type=int, default=None, help='Override the maximum field size. May be ' 'necessary for datasets with very wide text ' 'fields, but can lead to memory issues.') misc_gr = parser.add_argument_group('Miscellaneous') misc_gr.add_argument('-y', '--yes', dest='prompt', action='store_true', help="Always answer 'yes' for user prompts") misc_gr.add_argument('-n', '--no', dest='prompt', action='store_false', help="Always answer 'no' for user prompts") misc_gr.add_argument('--dry_run', dest='dry_run', action='store_true', help="Only read/chunk input data but dont send " "requests.") misc_gr.add_argument('--stdout', action='store_true', dest='stdout', default=False, help='Send all log messages to stdout.') conf_file = get_config_file() if conf_file: file_args = parse_config_file(conf_file) defaults.update(file_args) parser.set_defaults(**defaults) for action in parser._actions: if action.dest in defaults and action.required: action.required = False if '--' + action.dest not in argv: action.nargs = '?' parsed_args = { k: v for k, v in vars(parser.parse_args(argv)).items() if v is not None } return parsed_args
def main(argv=sys.argv[1:]): global ui # global variable hack, will get rid of a bit later warnings.simplefilter("ignore") parser = argparse.ArgumentParser( description=DESCRIPTION, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument( "--verbose", "-v", action="store_true", help="Provides status updates while " "the script is running." ) parser.add_argument("--version", action="version", version=VERSION_TEMPLATE, help="Show version") dataset_gr = parser.add_argument_group("Dataset and server") dataset_gr.add_argument( "--host", type=str, help="Specifies the protocol (http or https) and " "hostname of the prediction API endpoint. " 'E.g. "https://example.orm.datarobot.com"', ) dataset_gr.add_argument("project_id", type=str, help="Specifies the project " "identification string.") dataset_gr.add_argument("model_id", type=str, help="Specifies the model identification string.") dataset_gr.add_argument("dataset", type=str, help="Specifies the .csv input file that " "the script scores.") dataset_gr.add_argument( "--out", type=str, nargs="?", default="out.csv", help="Specifies the file name, " "and optionally path, " "to which the results are written. " "If not specified, " "the default file name is out.csv, " "written to the directory containing the script. " "(default: %(default)r)", ) auth_gr = parser.add_argument_group("Authentication parameters") auth_gr.add_argument( "--user", type=str, help="Specifies the username used to acquire " "the api-token. " "Use quotes if the name contains spaces.", ) auth_gr.add_argument( "--password", type=str, nargs="?", help="Specifies the password used to acquire " "the api-token. " "Use quotes if the name contains spaces.", ) auth_gr.add_argument( "--api_token", type=str, nargs="?", help="Specifies the api token for the requests; " "if you do not have a token, " "you must specify the password argument.", ) auth_gr.add_argument( "--create_api_token", action="store_true", default=False, help="Requests a new API token. To use this option, " "you must specify the " "password argument for this request " "(not the api_token argument). " "(default: %(default)r)", ) auth_gr.add_argument( "--datarobot_key", type=str, nargs="?", help="An additional datarobot_key " "for dedicated prediction instances.", ) conn_gr = parser.add_argument_group("Connection control") conn_gr.add_argument( "--timeout", type=int, default=30, help="The timeout for each post request. " "(default: %(default)r)" ) conn_gr.add_argument( "--n_samples", type=int, nargs="?", default=False, help="Specifies the number of samples (rows) to use " 'per batch. If not defined the "auto_sample" option ' "will be used.", ) conn_gr.add_argument( "--n_concurrent", type=int, nargs="?", default=4, help="Specifies the number of concurrent requests " "to submit. (default: %(default)r)", ) conn_gr.add_argument( "--n_retry", type=int, default=3, help="Specifies the number of times DataRobot " "will retry if a request fails. " "A value of -1, the default, specifies " "an infinite number of retries." "(default: %(default)r)", ) conn_gr.add_argument( "--resume", action="store_true", default=False, help="Starts the prediction from the point at which " "it was halted. " "If the prediction stopped, for example due " "to error or network connection issue, you can run " "the same command with all the same " "all arguments plus this resume argument.", ) csv_gr = parser.add_argument_group("CVS parameters") csv_gr.add_argument( "--keep_cols", type=str, nargs="?", help="Specifies the column names to append " "to the predictions. " "Enter as a comma-separated list.", ) csv_gr.add_argument( "--delimiter", type=str, nargs="?", default=None, help="Specifies the delimiter to recognize in " 'the input .csv file. E.g. "--delimiter=,". ' "If not specified, the script tries to automatically " 'determine the delimiter. The special keyword "tab" ' "can be used to indicate a tab delimited csv.", ) csv_gr.add_argument( "--pred_name", type=str, nargs="?", default=None, help="Specifies column name for prediction results, " "empty name is used if not specified. For binary " "predictions assumes last class in lexical order " "as positive", ) csv_gr.add_argument( "--fast", action="store_true", default=False, help="Experimental: faster CSV processor. " "Note: does not support multiline csv. ", ) csv_gr.add_argument( "--auto_sample", action="store_true", default=False, help='Override "n_samples" and instead ' "use chunks of about 1.5 MB. This is recommended and " 'enabled by default if "n_samples" is not defined.', ) csv_gr.add_argument( "--encoding", type=str, default="", help="Declare the dataset encoding. " "If an encoding is not provided the batch_scoring " 'script attempts to detect it. E.g "utf-8", "latin-1" ' 'or "iso2022_jp". See the Python docs for a list of ' "valid encodings " "https://docs.python.org/3/library/codecs.html" "#standard-encodings", ) csv_gr.add_argument( "--skip_dialect", action="store_true", default=False, help="Tell the batch_scoring script " "to skip csv dialect detection.", ) csv_gr.add_argument("--skip_row_id", action="store_true", default=False, help="Skip the row_id column in output.") csv_gr.add_argument("--output_delimiter", type=str, default=None, help="Set the delimiter for output file.") misc_gr = parser.add_argument_group("Miscellaneous") misc_gr.add_argument("-y", "--yes", dest="prompt", action="store_true", help="Always answer 'yes' for user prompts") misc_gr.add_argument("-n", "--no", dest="prompt", action="store_false", help="Always answer 'no' for user prompts") misc_gr.add_argument( "--dry_run", dest="dry_run", action="store_true", help="Only read/chunk input data but dont send " "requests." ) misc_gr.add_argument( "--stdout", action="store_true", dest="stdout", default=False, help="Send all log messages to stdout." ) defaults = { "prompt": None, "out": "out.csv", "create_api_token": False, "timeout": 30, "n_samples": False, "n_concurrent": 4, "n_retry": 3, "resume": False, "fast": False, "stdout": False, "auto_sample": False, } conf_file = get_config_file() if conf_file: file_args = parse_config_file(conf_file) defaults.update(file_args) parser.set_defaults(**defaults) for action in parser._actions: if action.dest in defaults and action.required: action.required = False if "--" + action.dest not in argv: action.nargs = "?" parsed_args = {k: v for k, v in vars(parser.parse_args(argv)).items() if v is not None} loglevel = logging.DEBUG if parsed_args["verbose"] else logging.INFO stdout = parsed_args["stdout"] ui = UI(parsed_args.get("prompt"), loglevel, stdout) printed_args = copy.copy(parsed_args) printed_args.pop("password", None) ui.debug(printed_args) ui.info("platform: {} {}".format(sys.platform, sys.version)) # parse args host = parsed_args["host"] pid = parsed_args["project_id"] lid = parsed_args["model_id"] n_retry = int(parsed_args["n_retry"]) if parsed_args.get("keep_cols"): keep_cols = [s.strip() for s in parsed_args["keep_cols"].split(",")] else: keep_cols = None concurrent = int(parsed_args["n_concurrent"]) dataset = parsed_args["dataset"] n_samples = int(parsed_args["n_samples"]) delimiter = parsed_args.get("delimiter") resume = parsed_args["resume"] out_file = parsed_args["out"] datarobot_key = parsed_args.get("datarobot_key") timeout = int(parsed_args["timeout"]) fast_mode = parsed_args["fast"] auto_sample = parsed_args["auto_sample"] if not n_samples: auto_sample = True encoding = parsed_args["encoding"] skip_dialect = parsed_args["skip_dialect"] skip_row_id = parsed_args["skip_row_id"] output_delimiter = parsed_args.get("output_delimiter") if "user" not in parsed_args: user = ui.prompt_user() else: user = parsed_args["user"].strip() if not os.path.exists(parsed_args["dataset"]): ui.fatal("file {} does not exist.".format(parsed_args["dataset"])) try: verify_objectid(pid) verify_objectid(lid) except ValueError as e: ui.fatal(str(e)) if delimiter == "\\t" or delimiter == "tab": # NOTE: on bash you have to use Ctrl-V + TAB delimiter = "\t" if delimiter and delimiter not in VALID_DELIMITERS: ui.fatal('Delimiter "{}" is not a valid delimiter.'.format(delimiter)) if output_delimiter == "\\t" or output_delimiter == "tab": # NOTE: on bash you have to use Ctrl-V + TAB output_delimiter = "\t" if output_delimiter and output_delimiter not in VALID_DELIMITERS: ui.fatal('Output delimiter "{}" is not a valid delimiter.'.format(output_delimiter)) api_token = parsed_args.get("api_token") create_api_token = parsed_args.get("create_api_token") pwd = parsed_args.get("password") pred_name = parsed_args.get("pred_name") dry_run = parsed_args.get("dry_run", False) base_url = parse_host(host, ui) base_headers = {} if datarobot_key: base_headers["datarobot-key"] = datarobot_key ui.debug("batch_scoring v{}".format(__version__)) ui.info("connecting to {}".format(base_url)) try: run_batch_predictions( base_url=base_url, base_headers=base_headers, user=user, pwd=pwd, api_token=api_token, create_api_token=create_api_token, pid=pid, lid=lid, n_retry=n_retry, concurrent=concurrent, resume=resume, n_samples=n_samples, out_file=out_file, keep_cols=keep_cols, delimiter=delimiter, dataset=dataset, pred_name=pred_name, timeout=timeout, ui=ui, fast_mode=fast_mode, auto_sample=auto_sample, dry_run=dry_run, encoding=encoding, skip_dialect=skip_dialect, skip_row_id=skip_row_id, output_delimiter=output_delimiter, ) except SystemError: pass except ShelveError as e: ui.error(str(e)) except KeyboardInterrupt: ui.info("Keyboard interrupt") except Exception as e: ui.fatal(str(e)) finally: ui.close()
def test_file_from_working_directory__has_priority( config_file_in_home_dir, config_file_in_current_dir, ): assert get_config_file() == config_file_in_current_dir
def test_file_from_working_directory(config_file_in_current_dir): assert get_config_file() == config_file_in_current_dir
def test_file_from_home_directory(config_file_in_home_dir): assert get_config_file() == config_file_in_home_dir
def parse_args(argv, standalone=False): defaults = { 'prompt': None, 'out': 'out.csv', 'create_api_token': False, 'timeout': None, 'n_samples': False, 'n_concurrent': 4, 'n_retry': 3, 'resume': None, 'fast': False, 'stdout': False, 'auto_sample': False, 'api_version': PRED_API_V10, } parser = argparse.ArgumentParser( description=DESCRIPTION, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--verbose', '-v', action="store_true", help='Provides status updates while ' 'the script is running.') parser.add_argument('--version', action='version', version=VERSION_TEMPLATE, help='Show version') dataset_gr = parser.add_argument_group('Dataset and server') dataset_gr.add_argument('--host', type=str, help='Specifies the protocol (http or https) and ' 'hostname of the prediction API endpoint. ' 'E.g. "https://example.orm.datarobot.com"') dataset_gr.add_argument('--out', type=str, nargs='?', default=defaults['out'], help='Specifies the file name, ' 'and optionally path, ' 'to which the results are written. ' 'If not specified, ' 'the default file name is out.csv, ' 'written to the directory containing the script. ' '(default: %(default)r)') if standalone: dataset_gr.add_argument('import_id', type=str, help='Specifies the project ' 'identification string.') else: dataset_gr.add_argument('--api_version', type=str, choices=RESPONSE_HANDLERS.keys(), default=defaults['api_version'], help='Specifies API version. ' '(default: %(default)r)') dataset_gr.add_argument('project_id', type=str, help='Specifies the project ' 'identification string.') dataset_gr.add_argument('model_id', type=str, help='Specifies the model identification ' 'string.') auth_gr = parser.add_argument_group('Authentication parameters') auth_gr.add_argument('--user', type=str, help='Specifies the username used to acquire ' 'the api-token. ' 'Use quotes if the name contains spaces.') auth_gr.add_argument('--password', type=str, nargs='?', help='Specifies the password used to acquire ' 'the api-token. ' 'Use quotes if the name contains spaces.') auth_gr.add_argument('--api_token', type=str, nargs='?', help='Specifies the api token for the requests; ' 'if you do not have a token, ' 'you must specify the password argument.') auth_gr.add_argument('--create_api_token', action="store_true", default=defaults['create_api_token'], help='Requests a new API token. To use this ' 'option, you must specify the ' 'password argument for this request ' '(not the api_token argument). ' '(default: %(default)r)') auth_gr.add_argument('--datarobot_key', type=str, nargs='?', help='An additional datarobot_key ' 'for dedicated prediction instances.') dataset_gr.add_argument('dataset', type=str, help='Specifies the .csv input file that ' 'the script scores.') conn_gr = parser.add_argument_group('Connection control') conn_gr.add_argument('--timeout', type=int, default=defaults['timeout'], help='The timeout for each post request. ' '(default: %(default)r)') conn_gr.add_argument('--n_samples', type=int, nargs='?', default=defaults['n_samples'], help='Specifies the number of samples ' '(rows) to use per batch. If not defined the ' '"auto_sample" option will be used.') conn_gr.add_argument('--n_concurrent', type=int, nargs='?', default=defaults['n_concurrent'], help='Specifies the number of concurrent requests ' 'to submit. (default: %(default)r)') conn_gr.add_argument('--n_retry', type=int, default=defaults['n_retry'], help='Specifies the number of times DataRobot ' 'will retry if a request fails. ' 'A value of -1 specifies an infinite ' 'number of retries. (default: %(default)r)') conn_gr.add_argument('--resume', dest='resume', action='store_true', default=defaults['resume'], help='Starts the prediction from the point at which ' 'it was halted. ' 'If the prediction stopped, for example due ' 'to error or network connection issue, you can run ' 'the same command with all the same ' 'all arguments plus this resume argument.') conn_gr.add_argument('--no-resume', dest='resume', action='store_false', help='Starts the prediction from scratch disregarding' ' previous run.') conn_gr.add_argument('--compress', action='store_true', default=False, help='Compress batch. This can improve throughout ' 'when bandwidth is limited.') conn_gr.add_argument('--ca_bundle', dest='verify_ssl', metavar='PATH', default=True, help='Specifies the path to a CA_BUNDLE file or ' 'directory with certificates of ' 'trusted Certificate Authorities (CAs) ' 'to be used for SSL verification. ' 'By default the system\'s set of trusted ' 'certificates will be used.') conn_gr.add_argument('--no_verify_ssl', action='store_false', dest='verify_ssl', help='Skip SSL certificates verification for HTTPS ' 'endpoints. Using this flag will cause the ' 'argument for ca_bundle to be ignored.') csv_gr = parser.add_argument_group('CVS parameters') csv_gr.add_argument('--keep_cols', type=str, nargs='?', help='Specifies the column names to append ' 'to the predictions. ' 'Enter as a comma-separated list.') csv_gr.add_argument('--delimiter', type=str, nargs='?', default=None, help='Specifies the delimiter to recognize in ' 'the input .csv file. E.g. "--delimiter=,". ' 'If not specified, the script tries to automatically ' 'determine the delimiter. The special keyword "tab" ' 'can be used to indicate a tab delimited csv. "pipe"' 'can be used to indicate "|"') csv_gr.add_argument('--pred_name', type=str, nargs='?', default=None, help='Specifies column name for prediction results, ' 'empty name is used if not specified. For binary ' 'predictions assumes last class in lexical order ' 'as positive') csv_gr.add_argument('--fast', action='store_true', default=defaults['fast'], help='Experimental: faster CSV processor. ' 'Note: does not support multiline csv. ') csv_gr.add_argument('--auto_sample', action='store_true', default=defaults['auto_sample'], help='Override "n_samples" and instead ' 'use chunks of about 1.5 MB. This is recommended and ' 'enabled by default if "n_samples" is not defined.') csv_gr.add_argument('--encoding', type=str, default='', help='Declare the dataset encoding. ' 'If an encoding is not provided the batch_scoring ' 'script attempts to detect it. E.g "utf-8", "latin-1" ' 'or "iso2022_jp". See the Python docs for a list of ' 'valid encodings ' 'https://docs.python.org/3/library/codecs.html' '#standard-encodings') csv_gr.add_argument('--skip_dialect', action='store_true', default=False, help='Tell the batch_scoring script ' 'to skip csv dialect detection.') csv_gr.add_argument('--skip_row_id', action='store_true', default=False, help='Skip the row_id column in output.') csv_gr.add_argument('--output_delimiter', type=str, default=None, help='Set the delimiter for output file.The special ' 'keyword "tab" can be used to indicate a tab ' 'delimited csv. "pipe" can be used to indicate ' '"|"') csv_gr.add_argument('--field_size_limit', type=int, default=None, help='Override the maximum field size. May be ' 'necessary for datasets with very wide text ' 'fields, but can lead to memory issues.') misc_gr = parser.add_argument_group('Miscellaneous') misc_gr.add_argument('-y', '--yes', dest='prompt', action='store_true', help="Always answer 'yes' for user prompts") misc_gr.add_argument('-n', '--no', dest='prompt', action='store_false', help="Always answer 'no' for user prompts") misc_gr.add_argument('--dry_run', dest='dry_run', action='store_true', help="Only read/chunk input data but dont send " "requests.") misc_gr.add_argument('--stdout', action='store_true', dest='stdout', default=False, help='Send all log messages to stdout.') conf_file = get_config_file() if conf_file: file_args = parse_config_file(conf_file) defaults.update(file_args) parser.set_defaults(**defaults) for action in parser._actions: if action.dest in defaults and action.required: action.required = False if '--' + action.dest not in argv: action.nargs = '?' parsed_args = {k: v for k, v in vars(parser.parse_args(argv)).items() if v is not None} return parsed_args