Esempio n. 1
0
def test_empty_file_doesnt_error():
    with NamedTemporaryFile(suffix='.ini', delete=False) as test_file:
        pass
    try:
        assert parse_config_file(test_file.name) == {}
    finally:
        os.remove(test_file.name)
Esempio n. 2
0
def test_empty_file_doesnt_error():
    with NamedTemporaryFile(suffix='.ini', delete=False) as test_file:
        pass
    try:
        assert parse_config_file(test_file.name) == {}
    finally:
        os.remove(test_file.name)
Esempio n. 3
0
def test_field_with_boolean_option(tmpdir, str_value, bool_value):
    tmpdir.join('test.ini').write_text(
        u'[batch_scoring]\nskip_row_id=%s' % str_value, 'utf-8')

    conf = parse_config_file(tmpdir.join('test.ini').strpath)

    assert isinstance(conf['skip_row_id'], bool)
    assert conf['skip_row_id'] == bool_value
def test_field_with_boolean_option(tmpdir, str_value, bool_value):
    tmpdir.join('test.ini').write_text(
        u'[batch_scoring]\nskip_row_id=%s' % str_value, 'utf-8')

    conf = parse_config_file(tmpdir.join('test.ini').strpath)

    assert isinstance(conf['skip_row_id'], bool)
    assert conf['skip_row_id'] == bool_value
Esempio n. 5
0
def test_field_with_config_option():
    raw_data = ('[batch_scoring]\n' 'field_size_limit=12345678')
    with NamedTemporaryFile(suffix='.ini', delete=False) as test_file:
        test_file.write(str(raw_data).encode('utf-8'))

    try:
        parsed_result = parse_config_file(test_file.name)
        assert parsed_result['field_size_limit'] == 12345678
    finally:
        os.remove(test_file.name)
def test_field_with_config_option():
    raw_data = (
        '[batch_scoring]\n'
        'field_size_limit=12345678'
    )
    with NamedTemporaryFile(suffix='.ini', delete=False) as test_file:
        test_file.write(str(raw_data).encode('utf-8'))

    try:
        parsed_result = parse_config_file(test_file.name)
        assert parsed_result['field_size_limit'] == 12345678
    finally:
        os.remove(test_file.name)
def test_section_basic_with_username():
    raw_data = textwrap.dedent("""\
        [batch_scoring]
        host=file_host
        project_id=file_project_id
        model_id=file_model_id
        user=file_username
        password=file_password""")
    with NamedTemporaryFile(suffix='*.ini') as test_file:
        test_file.write(str(raw_data).encode('utf-8'))
        test_file.seek(0)
        parsed_result = parse_config_file(test_file.name)
        assert isinstance(parsed_result, dict)
        assert parsed_result['host'] == 'file_host'
        assert parsed_result['project_id'] == 'file_project_id'
        assert parsed_result['model_id'] == 'file_model_id'
        assert parsed_result['user'] == 'file_username'
        assert parsed_result['password'] == 'file_password'
Esempio n. 8
0
def test_section_basic_with_username():
    raw_data = textwrap.dedent("""\
        [batch_scoring]
        host=file_host
        project_id=aaaaaaaaaaaaaaaaaaaaaaaa
        model_id=aaaaaaaaaaaaaaaaaaaaaaaa
        user=file_username
        password=file_password""")
    with NamedTemporaryFile(suffix='.ini', delete=False) as test_file:
        test_file.write(str(raw_data).encode('utf-8'))

    try:
        parsed_result = parse_config_file(test_file.name)
        assert isinstance(parsed_result, dict)
        assert parsed_result['host'] == 'file_host'
        assert parsed_result['project_id'] == 'aaaaaaaaaaaaaaaaaaaaaaaa'
        assert parsed_result['model_id'] == 'aaaaaaaaaaaaaaaaaaaaaaaa'
        assert parsed_result['user'] == 'file_username'
        assert parsed_result['password'] == 'file_password'
    finally:
        os.remove(test_file.name)
def test_section_basic_with_username():
    raw_data = textwrap.dedent("""\
        [batch_scoring]
        host=file_host
        project_id=aaaaaaaaaaaaaaaaaaaaaaaa
        model_id=aaaaaaaaaaaaaaaaaaaaaaaa
        user=file_username
        password=file_password""")
    with NamedTemporaryFile(suffix='.ini', delete=False) as test_file:
        test_file.write(str(raw_data).encode('utf-8'))

    try:
        parsed_result = parse_config_file(test_file.name)
        assert isinstance(parsed_result, dict)
        assert parsed_result['host'] == 'file_host'
        assert parsed_result['project_id'] == 'aaaaaaaaaaaaaaaaaaaaaaaa'
        assert parsed_result['model_id'] == 'aaaaaaaaaaaaaaaaaaaaaaaa'
        assert parsed_result['user'] == 'file_username'
        assert parsed_result['password'] == 'file_password'
    finally:
        os.remove(test_file.name)
Esempio n. 10
0
def test_empty_file_doesnt_error():
    with NamedTemporaryFile(suffix='*.ini') as test_file:
        assert parse_config_file(test_file.name) == {}
Esempio n. 11
0
def parse_args(argv, standalone=False, deployment_aware=False):
    both_set = standalone and deployment_aware
    assert not both_set, 'Both options can not be used in the same time'
    defaults = {
        'prompt': None,
        'out': 'out.csv',
        'create_api_token': False,
        'timeout': None,
        'n_samples': False,
        'n_concurrent': 4,
        'n_retry': 3,
        'resume': None,
        'fast': False,
        'stdout': False,
        'auto_sample': False,
        'api_version': PRED_API_V10,
        'max_prediction_explanations': 0
    }
    parser = argparse.ArgumentParser(
        description=DESCRIPTION,
        epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--verbose',
                        '-v',
                        action="store_true",
                        help='Provides status updates while '
                        'the script is running.')
    parser.add_argument('--version',
                        action='version',
                        version=VERSION_TEMPLATE,
                        help='Show version')
    dataset_gr = parser.add_argument_group('Dataset and server')
    dataset_gr.add_argument('--host',
                            type=str,
                            help='Specifies the protocol (http or https) and '
                            'hostname of the prediction API endpoint. '
                            'E.g. "https://example.orm.datarobot.com"')
    dataset_gr.add_argument('--out',
                            type=str,
                            nargs='?',
                            default=defaults['out'],
                            help='Specifies the file name, '
                            'and optionally path, '
                            'to which the results are written. '
                            'If not specified, '
                            'the default file name is out.csv, '
                            'written to the directory containing the script. '
                            '(default: %(default)r)')
    if standalone:
        dataset_gr.add_argument('import_id',
                                type=str,
                                help='Specifies the project '
                                'identification string.')
    else:
        dataset_gr.add_argument('--api_version',
                                type=str,
                                choices=RESPONSE_HANDLERS.keys(),
                                default=defaults['api_version'],
                                help='Specifies API version. '
                                '(default: %(default)r)')
        if deployment_aware:
            dataset_gr.add_argument('deployment_id',
                                    type=str,
                                    help='Specifies the model deployment '
                                    'identification string.')
        else:
            dataset_gr.add_argument('project_id',
                                    type=str,
                                    help='Specifies the project '
                                    'identification string.')
            dataset_gr.add_argument('model_id',
                                    type=str,
                                    help='Specifies the model identification '
                                    'string.')
        auth_gr = parser.add_argument_group('Authentication parameters')
        auth_gr.add_argument('--user',
                             type=str,
                             help='Specifies the username used to acquire '
                             'the api-token. '
                             'Use quotes if the name contains spaces.')
        auth_gr.add_argument('--password',
                             type=str,
                             nargs='?',
                             help='Specifies the password used to acquire '
                             'the api-token. '
                             'Use quotes if the name contains spaces.')
        auth_gr.add_argument('--api_token',
                             type=str,
                             nargs='?',
                             help='Specifies the api token for the requests; '
                             'if you do not have a token, '
                             'you must specify the password argument.')
        auth_gr.add_argument('--create_api_token',
                             action="store_true",
                             default=defaults['create_api_token'],
                             help='Requests a new API token. To use this '
                             'option, you must specify the '
                             'password argument for this request '
                             '(not the api_token argument). '
                             '(default: %(default)r)')
        auth_gr.add_argument('--datarobot_key',
                             type=str,
                             nargs='?',
                             help='An additional datarobot_key '
                             'for dedicated prediction instances.')
    dataset_gr.add_argument('dataset',
                            type=str,
                            help='Specifies the .csv input file that '
                            'the script scores.')
    dataset_gr.add_argument('--max_prediction_explanations',
                            type=int,
                            default=defaults['max_prediction_explanations'],
                            help='The maximum number of prediction '
                            'explanations that will be generate for '
                            'each prediction.'
                            'Not compatible with api version `api/v1`')

    conn_gr = parser.add_argument_group('Connection control')
    conn_gr.add_argument('--timeout',
                         type=int,
                         default=defaults['timeout'],
                         help='The timeout for each post request. '
                         '(default: %(default)r)')
    conn_gr.add_argument('--n_samples',
                         type=int,
                         nargs='?',
                         default=defaults['n_samples'],
                         help='Specifies the number of samples '
                         '(rows) to use per batch. If not defined the '
                         '"auto_sample" option will be used.')
    conn_gr.add_argument('--n_concurrent',
                         type=int,
                         nargs='?',
                         default=defaults['n_concurrent'],
                         help='Specifies the number of concurrent requests '
                         'to submit. (default: %(default)r)')
    conn_gr.add_argument('--n_retry',
                         type=int,
                         default=defaults['n_retry'],
                         help='Specifies the number of times DataRobot '
                         'will retry if a request fails. '
                         'A value of -1 specifies an infinite '
                         'number of retries. (default: %(default)r)')
    conn_gr.add_argument('--resume',
                         dest='resume',
                         action='store_true',
                         default=defaults['resume'],
                         help='Starts the prediction from the point at which '
                         'it was halted. '
                         'If the prediction stopped, for example due '
                         'to error or network connection issue, you can run '
                         'the same command with all the same '
                         'all arguments plus this resume argument.')
    conn_gr.add_argument('--no-resume',
                         dest='resume',
                         action='store_false',
                         help='Starts the prediction from scratch disregarding'
                         ' previous run.')
    conn_gr.add_argument('--compress',
                         action='store_true',
                         default=False,
                         help='Compress batch. This can improve throughout '
                         'when bandwidth is limited.')
    conn_gr.add_argument('--ca_bundle',
                         dest='verify_ssl',
                         metavar='PATH',
                         default=True,
                         help='Specifies the path to a CA_BUNDLE file or '
                         'directory with certificates of '
                         'trusted Certificate Authorities (CAs) '
                         'to be used for SSL verification. '
                         'By default the system\'s set of trusted '
                         'certificates will be used.')
    conn_gr.add_argument('--no_verify_ssl',
                         action='store_false',
                         dest='verify_ssl',
                         help='Skip SSL certificates verification for HTTPS '
                         'endpoints. Using this flag will cause the '
                         'argument for ca_bundle to be ignored.')
    csv_gr = parser.add_argument_group('CSV parameters')
    csv_gr.add_argument('--keep_cols',
                        type=str,
                        nargs='?',
                        help='Specifies the column names to append '
                        'to the predictions. '
                        'Enter as a comma-separated list.')
    csv_gr.add_argument('--delimiter',
                        type=str,
                        nargs='?',
                        default=None,
                        help='Specifies the delimiter to recognize in '
                        'the input .csv file. E.g. "--delimiter=,". '
                        'If not specified, the script tries to automatically '
                        'determine the delimiter. The special keyword "tab" '
                        'can be used to indicate a tab delimited csv. "pipe"'
                        'can be used to indicate "|"')
    csv_gr.add_argument('--pred_name',
                        type=str,
                        nargs='?',
                        default=None,
                        help='Specifies column name for prediction results, '
                        'empty name is used if not specified. For binary '
                        'predictions assumes last class in lexical order '
                        'as positive')
    csv_gr.add_argument('--pred_threshold',
                        type=str,
                        nargs='?',
                        default=None,
                        help='Specifies column name for prediction threshold '
                        'for binary classification. Column will not be '
                        'included if not specified')
    csv_gr.add_argument('--pred_decision',
                        type=str,
                        nargs='?',
                        default=None,
                        help='Specifies column name for prediction decision, '
                        'the value predicted by the model (class label '
                        'for classification)')
    csv_gr.add_argument('--fast',
                        action='store_true',
                        default=defaults['fast'],
                        help='Experimental: faster CSV processor. '
                        'Note: does not support multiline csv. ')
    csv_gr.add_argument('--auto_sample',
                        action='store_true',
                        default=defaults['auto_sample'],
                        help='Override "n_samples" and instead '
                        'use chunks of about 1.5 MB. This is recommended and '
                        'enabled by default if "n_samples" is not defined.')
    csv_gr.add_argument('--encoding',
                        type=str,
                        default='',
                        help='Declare the dataset encoding. '
                        'If an encoding is not provided the batch_scoring '
                        'script attempts to detect it. E.g "utf-8", "latin-1" '
                        'or "iso2022_jp". See the Python docs for a list of '
                        'valid encodings '
                        'https://docs.python.org/3/library/codecs.html'
                        '#standard-encodings')
    csv_gr.add_argument('--skip_dialect',
                        action='store_true',
                        default=False,
                        help='Tell the batch_scoring script '
                        'to skip csv dialect detection.')
    csv_gr.add_argument('--skip_row_id',
                        action='store_true',
                        default=False,
                        help='Skip the row_id column in output.')
    csv_gr.add_argument('--output_delimiter',
                        type=str,
                        default=None,
                        help='Set the delimiter for output file.The special '
                        'keyword "tab" can be used to indicate a tab '
                        'delimited csv. "pipe" can be used to indicate '
                        '"|"')
    csv_gr.add_argument('--field_size_limit',
                        type=int,
                        default=None,
                        help='Override the maximum field size. May be '
                        'necessary for datasets with very wide text '
                        'fields, but can lead to memory issues.')
    misc_gr = parser.add_argument_group('Miscellaneous')
    misc_gr.add_argument('-y',
                         '--yes',
                         dest='prompt',
                         action='store_true',
                         help="Always answer 'yes' for user prompts")
    misc_gr.add_argument('-n',
                         '--no',
                         dest='prompt',
                         action='store_false',
                         help="Always answer 'no' for user prompts")
    misc_gr.add_argument('--dry_run',
                         dest='dry_run',
                         action='store_true',
                         help="Only read/chunk input data but dont send "
                         "requests.")
    misc_gr.add_argument('--stdout',
                         action='store_true',
                         dest='stdout',
                         default=False,
                         help='Send all log messages to stdout.')

    conf_file = get_config_file()
    if conf_file:
        file_args = parse_config_file(conf_file)
        defaults.update(file_args)
    parser.set_defaults(**defaults)
    for action in parser._actions:
        if action.dest in defaults and action.required:
            action.required = False
            if '--' + action.dest not in argv:
                action.nargs = '?'
    parsed_args = {
        k: v
        for k, v in vars(parser.parse_args(argv)).items() if v is not None
    }
    return parsed_args
Esempio n. 12
0
def main(argv=sys.argv[1:]):
    global ui  # global variable hack, will get rid of a bit later
    warnings.simplefilter("ignore")
    parser = argparse.ArgumentParser(
        description=DESCRIPTION, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument(
        "--verbose", "-v", action="store_true", help="Provides status updates while " "the script is running."
    )
    parser.add_argument("--version", action="version", version=VERSION_TEMPLATE, help="Show version")
    dataset_gr = parser.add_argument_group("Dataset and server")
    dataset_gr.add_argument(
        "--host",
        type=str,
        help="Specifies the protocol (http or https) and "
        "hostname of the prediction API endpoint. "
        'E.g. "https://example.orm.datarobot.com"',
    )
    dataset_gr.add_argument("project_id", type=str, help="Specifies the project " "identification string.")
    dataset_gr.add_argument("model_id", type=str, help="Specifies the model identification string.")
    dataset_gr.add_argument("dataset", type=str, help="Specifies the .csv input file that " "the script scores.")
    dataset_gr.add_argument(
        "--out",
        type=str,
        nargs="?",
        default="out.csv",
        help="Specifies the file name, "
        "and optionally path, "
        "to which the results are written. "
        "If not specified, "
        "the default file name is out.csv, "
        "written to the directory containing the script. "
        "(default: %(default)r)",
    )
    auth_gr = parser.add_argument_group("Authentication parameters")
    auth_gr.add_argument(
        "--user",
        type=str,
        help="Specifies the username used to acquire " "the api-token. " "Use quotes if the name contains spaces.",
    )
    auth_gr.add_argument(
        "--password",
        type=str,
        nargs="?",
        help="Specifies the password used to acquire " "the api-token. " "Use quotes if the name contains spaces.",
    )
    auth_gr.add_argument(
        "--api_token",
        type=str,
        nargs="?",
        help="Specifies the api token for the requests; "
        "if you do not have a token, "
        "you must specify the password argument.",
    )
    auth_gr.add_argument(
        "--create_api_token",
        action="store_true",
        default=False,
        help="Requests a new API token. To use this option, "
        "you must specify the "
        "password argument for this request "
        "(not the api_token argument). "
        "(default: %(default)r)",
    )
    auth_gr.add_argument(
        "--datarobot_key",
        type=str,
        nargs="?",
        help="An additional datarobot_key " "for dedicated prediction instances.",
    )
    conn_gr = parser.add_argument_group("Connection control")
    conn_gr.add_argument(
        "--timeout", type=int, default=30, help="The timeout for each post request. " "(default: %(default)r)"
    )
    conn_gr.add_argument(
        "--n_samples",
        type=int,
        nargs="?",
        default=False,
        help="Specifies the number of samples (rows) to use "
        'per batch. If not defined the "auto_sample" option '
        "will be used.",
    )
    conn_gr.add_argument(
        "--n_concurrent",
        type=int,
        nargs="?",
        default=4,
        help="Specifies the number of concurrent requests " "to submit. (default: %(default)r)",
    )
    conn_gr.add_argument(
        "--n_retry",
        type=int,
        default=3,
        help="Specifies the number of times DataRobot "
        "will retry if a request fails. "
        "A value of -1, the default, specifies "
        "an infinite number of retries."
        "(default: %(default)r)",
    )
    conn_gr.add_argument(
        "--resume",
        action="store_true",
        default=False,
        help="Starts the prediction from the point at which "
        "it was halted. "
        "If the prediction stopped, for example due "
        "to error or network connection issue, you can run "
        "the same command with all the same "
        "all arguments plus this resume argument.",
    )
    csv_gr = parser.add_argument_group("CVS parameters")
    csv_gr.add_argument(
        "--keep_cols",
        type=str,
        nargs="?",
        help="Specifies the column names to append " "to the predictions. " "Enter as a comma-separated list.",
    )
    csv_gr.add_argument(
        "--delimiter",
        type=str,
        nargs="?",
        default=None,
        help="Specifies the delimiter to recognize in "
        'the input .csv file. E.g. "--delimiter=,". '
        "If not specified, the script tries to automatically "
        'determine the delimiter. The special keyword "tab" '
        "can be used to indicate a tab delimited csv.",
    )
    csv_gr.add_argument(
        "--pred_name",
        type=str,
        nargs="?",
        default=None,
        help="Specifies column name for prediction results, "
        "empty name is used if not specified. For binary "
        "predictions assumes last class in lexical order "
        "as positive",
    )
    csv_gr.add_argument(
        "--fast",
        action="store_true",
        default=False,
        help="Experimental: faster CSV processor. " "Note: does not support multiline csv. ",
    )
    csv_gr.add_argument(
        "--auto_sample",
        action="store_true",
        default=False,
        help='Override "n_samples" and instead '
        "use chunks of about 1.5 MB. This is recommended and "
        'enabled by default if "n_samples" is not defined.',
    )
    csv_gr.add_argument(
        "--encoding",
        type=str,
        default="",
        help="Declare the dataset encoding. "
        "If an encoding is not provided the batch_scoring "
        'script attempts to detect it. E.g "utf-8", "latin-1" '
        'or "iso2022_jp". See the Python docs for a list of '
        "valid encodings "
        "https://docs.python.org/3/library/codecs.html"
        "#standard-encodings",
    )
    csv_gr.add_argument(
        "--skip_dialect",
        action="store_true",
        default=False,
        help="Tell the batch_scoring script " "to skip csv dialect detection.",
    )
    csv_gr.add_argument("--skip_row_id", action="store_true", default=False, help="Skip the row_id column in output.")
    csv_gr.add_argument("--output_delimiter", type=str, default=None, help="Set the delimiter for output file.")
    misc_gr = parser.add_argument_group("Miscellaneous")
    misc_gr.add_argument("-y", "--yes", dest="prompt", action="store_true", help="Always answer 'yes' for user prompts")
    misc_gr.add_argument("-n", "--no", dest="prompt", action="store_false", help="Always answer 'no' for user prompts")
    misc_gr.add_argument(
        "--dry_run", dest="dry_run", action="store_true", help="Only read/chunk input data but dont send " "requests."
    )
    misc_gr.add_argument(
        "--stdout", action="store_true", dest="stdout", default=False, help="Send all log messages to stdout."
    )

    defaults = {
        "prompt": None,
        "out": "out.csv",
        "create_api_token": False,
        "timeout": 30,
        "n_samples": False,
        "n_concurrent": 4,
        "n_retry": 3,
        "resume": False,
        "fast": False,
        "stdout": False,
        "auto_sample": False,
    }

    conf_file = get_config_file()
    if conf_file:
        file_args = parse_config_file(conf_file)
        defaults.update(file_args)
    parser.set_defaults(**defaults)
    for action in parser._actions:
        if action.dest in defaults and action.required:
            action.required = False
            if "--" + action.dest not in argv:
                action.nargs = "?"
    parsed_args = {k: v for k, v in vars(parser.parse_args(argv)).items() if v is not None}
    loglevel = logging.DEBUG if parsed_args["verbose"] else logging.INFO
    stdout = parsed_args["stdout"]
    ui = UI(parsed_args.get("prompt"), loglevel, stdout)
    printed_args = copy.copy(parsed_args)
    printed_args.pop("password", None)
    ui.debug(printed_args)
    ui.info("platform: {} {}".format(sys.platform, sys.version))

    # parse args
    host = parsed_args["host"]
    pid = parsed_args["project_id"]
    lid = parsed_args["model_id"]
    n_retry = int(parsed_args["n_retry"])
    if parsed_args.get("keep_cols"):
        keep_cols = [s.strip() for s in parsed_args["keep_cols"].split(",")]
    else:
        keep_cols = None
    concurrent = int(parsed_args["n_concurrent"])
    dataset = parsed_args["dataset"]
    n_samples = int(parsed_args["n_samples"])
    delimiter = parsed_args.get("delimiter")
    resume = parsed_args["resume"]
    out_file = parsed_args["out"]
    datarobot_key = parsed_args.get("datarobot_key")
    timeout = int(parsed_args["timeout"])
    fast_mode = parsed_args["fast"]
    auto_sample = parsed_args["auto_sample"]
    if not n_samples:
        auto_sample = True
    encoding = parsed_args["encoding"]
    skip_dialect = parsed_args["skip_dialect"]
    skip_row_id = parsed_args["skip_row_id"]
    output_delimiter = parsed_args.get("output_delimiter")

    if "user" not in parsed_args:
        user = ui.prompt_user()
    else:
        user = parsed_args["user"].strip()

    if not os.path.exists(parsed_args["dataset"]):
        ui.fatal("file {} does not exist.".format(parsed_args["dataset"]))

    try:
        verify_objectid(pid)
        verify_objectid(lid)
    except ValueError as e:
        ui.fatal(str(e))

    if delimiter == "\\t" or delimiter == "tab":
        # NOTE: on bash you have to use Ctrl-V + TAB
        delimiter = "\t"

    if delimiter and delimiter not in VALID_DELIMITERS:
        ui.fatal('Delimiter "{}" is not a valid delimiter.'.format(delimiter))

    if output_delimiter == "\\t" or output_delimiter == "tab":
        # NOTE: on bash you have to use Ctrl-V + TAB
        output_delimiter = "\t"

    if output_delimiter and output_delimiter not in VALID_DELIMITERS:
        ui.fatal('Output delimiter "{}" is not a valid delimiter.'.format(output_delimiter))

    api_token = parsed_args.get("api_token")
    create_api_token = parsed_args.get("create_api_token")
    pwd = parsed_args.get("password")
    pred_name = parsed_args.get("pred_name")
    dry_run = parsed_args.get("dry_run", False)

    base_url = parse_host(host, ui)

    base_headers = {}
    if datarobot_key:
        base_headers["datarobot-key"] = datarobot_key

    ui.debug("batch_scoring v{}".format(__version__))
    ui.info("connecting to {}".format(base_url))

    try:
        run_batch_predictions(
            base_url=base_url,
            base_headers=base_headers,
            user=user,
            pwd=pwd,
            api_token=api_token,
            create_api_token=create_api_token,
            pid=pid,
            lid=lid,
            n_retry=n_retry,
            concurrent=concurrent,
            resume=resume,
            n_samples=n_samples,
            out_file=out_file,
            keep_cols=keep_cols,
            delimiter=delimiter,
            dataset=dataset,
            pred_name=pred_name,
            timeout=timeout,
            ui=ui,
            fast_mode=fast_mode,
            auto_sample=auto_sample,
            dry_run=dry_run,
            encoding=encoding,
            skip_dialect=skip_dialect,
            skip_row_id=skip_row_id,
            output_delimiter=output_delimiter,
        )
    except SystemError:
        pass
    except ShelveError as e:
        ui.error(str(e))
    except KeyboardInterrupt:
        ui.info("Keyboard interrupt")
    except Exception as e:
        ui.fatal(str(e))
    finally:
        ui.close()
Esempio n. 13
0
def parse_args(argv, standalone=False):
    defaults = {
        'prompt': None,
        'out': 'out.csv',
        'create_api_token': False,
        'timeout': None,
        'n_samples': False,
        'n_concurrent': 4,
        'n_retry': 3,
        'resume': None,
        'fast': False,
        'stdout': False,
        'auto_sample': False,
        'api_version': PRED_API_V10,
    }
    parser = argparse.ArgumentParser(
        description=DESCRIPTION, epilog=EPILOG,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('--verbose', '-v', action="store_true",
                        help='Provides status updates while '
                        'the script is running.')
    parser.add_argument('--version', action='version',
                        version=VERSION_TEMPLATE, help='Show version')
    dataset_gr = parser.add_argument_group('Dataset and server')
    dataset_gr.add_argument('--host', type=str,
                            help='Specifies the protocol (http or https) and '
                                 'hostname of the prediction API endpoint. '
                                 'E.g. "https://example.orm.datarobot.com"')
    dataset_gr.add_argument('--out', type=str,
                            nargs='?', default=defaults['out'],
                            help='Specifies the file name, '
                            'and optionally path, '
                            'to which the results are written. '
                            'If not specified, '
                            'the default file name is out.csv, '
                            'written to the directory containing the script. '
                            '(default: %(default)r)')
    if standalone:
        dataset_gr.add_argument('import_id', type=str,
                                help='Specifies the project '
                                'identification string.')
    else:
        dataset_gr.add_argument('--api_version', type=str,
                                choices=RESPONSE_HANDLERS.keys(),
                                default=defaults['api_version'],
                                help='Specifies API version. '
                                     '(default: %(default)r)')
        dataset_gr.add_argument('project_id', type=str,
                                help='Specifies the project '
                                'identification string.')
        dataset_gr.add_argument('model_id', type=str,
                                help='Specifies the model identification '
                                     'string.')
        auth_gr = parser.add_argument_group('Authentication parameters')
        auth_gr.add_argument('--user', type=str,
                             help='Specifies the username used to acquire '
                             'the api-token. '
                             'Use quotes if the name contains spaces.')
        auth_gr.add_argument('--password', type=str, nargs='?',
                             help='Specifies the password used to acquire '
                             'the api-token. '
                             'Use quotes if the name contains spaces.')
        auth_gr.add_argument('--api_token', type=str, nargs='?',
                             help='Specifies the api token for the requests; '
                             'if you do not have a token, '
                             'you must specify the password argument.')
        auth_gr.add_argument('--create_api_token', action="store_true",
                             default=defaults['create_api_token'],
                             help='Requests a new API token. To use this '
                                  'option, you must specify the '
                                  'password argument for this request '
                                  '(not the api_token argument). '
                                  '(default: %(default)r)')
        auth_gr.add_argument('--datarobot_key', type=str,
                             nargs='?',
                             help='An additional datarobot_key '
                             'for dedicated prediction instances.')
    dataset_gr.add_argument('dataset', type=str,
                            help='Specifies the .csv input file that '
                            'the script scores.')

    conn_gr = parser.add_argument_group('Connection control')
    conn_gr.add_argument('--timeout', type=int,
                         default=defaults['timeout'],
                         help='The timeout for each post request. '
                         '(default: %(default)r)')
    conn_gr.add_argument('--n_samples', type=int,
                         nargs='?',
                         default=defaults['n_samples'],
                         help='Specifies the number of samples '
                              '(rows) to use per batch. If not defined the '
                              '"auto_sample" option will be used.')
    conn_gr.add_argument('--n_concurrent', type=int,
                         nargs='?',
                         default=defaults['n_concurrent'],
                         help='Specifies the number of concurrent requests '
                         'to submit. (default: %(default)r)')
    conn_gr.add_argument('--n_retry', type=int,
                         default=defaults['n_retry'],
                         help='Specifies the number of times DataRobot '
                         'will retry if a request fails. '
                         'A value of -1 specifies an infinite '
                         'number of retries. (default: %(default)r)')
    conn_gr.add_argument('--resume', dest='resume', action='store_true',
                         default=defaults['resume'],
                         help='Starts the prediction from the point at which '
                         'it was halted. '
                         'If the prediction stopped, for example due '
                         'to error or network connection issue, you can run '
                         'the same command with all the same '
                         'all arguments plus this resume argument.')
    conn_gr.add_argument('--no-resume', dest='resume', action='store_false',
                         help='Starts the prediction from scratch disregarding'
                         ' previous run.')
    conn_gr.add_argument('--compress', action='store_true',
                         default=False,
                         help='Compress batch. This can improve throughout '
                              'when bandwidth is limited.')
    conn_gr.add_argument('--ca_bundle',
                         dest='verify_ssl',
                         metavar='PATH',
                         default=True,
                         help='Specifies the path to a CA_BUNDLE file or '
                              'directory with certificates of '
                              'trusted Certificate Authorities (CAs) '
                              'to be used for SSL verification. '
                              'By default the system\'s set of trusted '
                              'certificates will be used.')
    conn_gr.add_argument('--no_verify_ssl',
                         action='store_false',
                         dest='verify_ssl',
                         help='Skip SSL certificates verification for HTTPS '
                              'endpoints. Using this flag will cause the '
                              'argument for ca_bundle to be ignored.')
    csv_gr = parser.add_argument_group('CVS parameters')
    csv_gr.add_argument('--keep_cols', type=str,
                        nargs='?',
                        help='Specifies the column names to append '
                        'to the predictions. '
                        'Enter as a comma-separated list.')
    csv_gr.add_argument('--delimiter', type=str,
                        nargs='?', default=None,
                        help='Specifies the delimiter to recognize in '
                        'the input .csv file. E.g. "--delimiter=,". '
                        'If not specified, the script tries to automatically '
                        'determine the delimiter. The special keyword "tab" '
                        'can be used to indicate a tab delimited csv. "pipe"'
                        'can be used to indicate "|"')
    csv_gr.add_argument('--pred_name', type=str,
                        nargs='?', default=None,
                        help='Specifies column name for prediction results, '
                        'empty name is used if not specified. For binary '
                        'predictions assumes last class in lexical order '
                        'as positive')
    csv_gr.add_argument('--fast', action='store_true',
                        default=defaults['fast'],
                        help='Experimental: faster CSV processor. '
                        'Note: does not support multiline csv. ')
    csv_gr.add_argument('--auto_sample', action='store_true',
                        default=defaults['auto_sample'],
                        help='Override "n_samples" and instead '
                        'use chunks of about 1.5 MB. This is recommended and '
                        'enabled by default if "n_samples" is not defined.')
    csv_gr.add_argument('--encoding', type=str,
                        default='', help='Declare the dataset encoding. '
                        'If an encoding is not provided the batch_scoring '
                        'script attempts to detect it. E.g "utf-8", "latin-1" '
                        'or "iso2022_jp". See the Python docs for a list of '
                        'valid encodings '
                        'https://docs.python.org/3/library/codecs.html'
                        '#standard-encodings')
    csv_gr.add_argument('--skip_dialect',  action='store_true',
                        default=False, help='Tell the batch_scoring script '
                        'to skip csv dialect detection.')
    csv_gr.add_argument('--skip_row_id', action='store_true', default=False,
                        help='Skip the row_id column in output.')
    csv_gr.add_argument('--output_delimiter', type=str, default=None,
                        help='Set the delimiter for output file.The special '
                             'keyword "tab" can be used to indicate a tab '
                             'delimited csv. "pipe" can be used to indicate '
                             '"|"')
    csv_gr.add_argument('--field_size_limit', type=int, default=None,
                        help='Override the maximum field size. May be '
                             'necessary for datasets with very wide text '
                             'fields, but can lead to memory issues.')
    misc_gr = parser.add_argument_group('Miscellaneous')
    misc_gr.add_argument('-y', '--yes', dest='prompt', action='store_true',
                         help="Always answer 'yes' for user prompts")
    misc_gr.add_argument('-n', '--no', dest='prompt', action='store_false',
                         help="Always answer 'no' for user prompts")
    misc_gr.add_argument('--dry_run', dest='dry_run', action='store_true',
                         help="Only read/chunk input data but dont send "
                         "requests.")
    misc_gr.add_argument('--stdout', action='store_true', dest='stdout',
                         default=False,
                         help='Send all log messages to stdout.')

    conf_file = get_config_file()
    if conf_file:
        file_args = parse_config_file(conf_file)
        defaults.update(file_args)
    parser.set_defaults(**defaults)
    for action in parser._actions:
        if action.dest in defaults and action.required:
            action.required = False
            if '--' + action.dest not in argv:
                action.nargs = '?'
    parsed_args = {k: v
                   for k, v in vars(parser.parse_args(argv)).items()
                   if v is not None}
    return parsed_args