예제 #1
0
 def test_getpass(self):
     ui = UI(None, logging.DEBUG)
     with mock.patch(
             'datarobot_batch_scoring.utils.getpass.getpass') as m_getpass:
         m_getpass.return_value = 'passwd'
         assert 'passwd' == ui.getpass()
         m_getpass.assert_called_with('password> ')
예제 #2
0
 def test_prompt_yesno_user_input_invalid(self):
     ui = UI(None, logging.DEBUG)
     with mock.patch('datarobot_batch_scoring.utils.input') as m_input:
         m_input.side_effect = ['invalid', 'yes']
         assert ui.prompt_yesno('msg')
         m_input.assert_has_calls([mock.call('msg (Yes/No)> '),
                                   mock.call('Please type (Yes/No)> ')])
예제 #3
0
 def test_getpass_noninteractive(self):
     ui = UI(True, logging.DEBUG)
     with mock.patch(
             'datarobot_batch_scoring.utils.getpass.getpass') as m_getpass:
         with pytest.raises(RuntimeError) as exc:
             ui.getpass()
         assert str(exc.value) == "Non-interactive session"
         assert not m_getpass.called
예제 #4
0
 def test_error(self):
     ui = UI(None, logging.DEBUG)
     with mock.patch('datarobot_batch_scoring.utils.logger') as m_log:
         with mock.patch(
                 'datarobot_batch_scoring.utils.root_logger') as m_root:
             ui.error('text')
             m_log.error.assert_called_with('text')
             m_root.error.assert_called_with('text', exc_info=False)
예제 #5
0
 def test_fatal(self):
     ui = UI(None, logging.DEBUG)
     msg = ('{}\nIf you need assistance please send the log \n'
            'file {} to [email protected] .').format(
                'text', ui.root_logger_filename)
     with mock.patch('datarobot_batch_scoring.utils.logger') as m_log:
         with mock.patch(
                 'datarobot_batch_scoring.utils.root_logger') as m_root:
             with mock.patch(
                     'datarobot_batch_scoring.utils.sys.exit') as m_exit:
                 ui.fatal('text')
             m_log.error.assert_called_with(msg)
             m_root.error.assert_called_with(msg,
                                             exc_info=(None, None, None))
             m_exit.assert_called_with(1)
예제 #6
0
 def test_getpass(self):
     with UI(None, logging.DEBUG, stdout=False) as ui:
         with mock.patch('datarobot_batch_scoring.utils.getpass.getpass'
                         '') as m_getpass:
             m_getpass.return_value = 'passwd'
             assert 'passwd' == ui.getpass()
             m_getpass.assert_called_with('password> ')
예제 #7
0
 def test_prompt_yesno_user_input_invalid(self):
     with UI(None, logging.DEBUG, stdout=False) as ui:
         with mock.patch('datarobot_batch_scoring.utils.input') as m_input:
             m_input.side_effect = ['invalid', 'yes']
             assert ui.prompt_yesno('msg')
             m_input.assert_has_calls([mock.call('msg (Yes/No)> '),
                                       mock.call('Please type (Yes/No)> ')])
예제 #8
0
 def test_prompt_user(self):
         with UI(None, logging.DEBUG, stdout=False) as ui:
             with mock.patch('datarobot_batch_scoring.utils.input'
                             '') as m_input:
                 m_input.return_value = 'Andrew'
                 assert ui.prompt_user() == 'Andrew'
                 m_input.assert_called_with('user name> ')
예제 #9
0
 def test_getpass_noninteractive(self):
     with UI(True, logging.DEBUG, stdout=False) as ui:
         with mock.patch('datarobot_batch_scoring.utils.getpass.getpass'
                         '') as m_getpass:
             with pytest.raises(RuntimeError) as exc:
                 ui.getpass()
             assert str(exc.value) == "Non-interactive session"
             assert not m_getpass.called
예제 #10
0
 def test_error(self):
     with UI(None, logging.DEBUG, stdout=False) as ui:
         with mock.patch('datarobot_batch_scoring.utils.logger') as m_log:
             with mock.patch(
                     'datarobot_batch_scoring.utils.root_logger') as m_root:
                 ui.error('text')
                 m_log.error.assert_called_with('text')
                 m_root.error.assert_called_with('text', exc_info=False)
예제 #11
0
def test_investigate_encoding_and_dialect():
    with UI(None, logging.DEBUG, stdout=False) as ui:
        data = 'tests/fixtures/windows_encoded.csv'
        encoding = investigate_encoding_and_dialect(data, None, ui)
        dialect = csv.get_dialect('dataset_dialect')
        assert encoding == 'iso-8859-2'
        assert dialect.lineterminator == '\r\n'
        assert dialect.quotechar == '"'
        assert dialect.delimiter == ','
예제 #12
0
def test_parse_host_no_protocol_fatal():
    host = '57a2a9eac808914f2fb8f717.com/api'
    with UI(None, logging.INFO, stdout=False) as ui:
        with mock.patch('datarobot_batch_scoring.utils.UI.fatal') as ui_fatal:
            msg = ('Cannot parse "--host" argument. Host address must start '
                   'with a protocol such as "http://" or "https://".'
                   ' Value given: {}').format(host)
            parse_host(host, ui)
            ui_fatal.assert_called_with(msg)
예제 #13
0
def test_investigate_encoding_and_dialect_submit_encoding():

    with UI(None, logging.DEBUG, stdout=False) as ui:
        with mock.patch('datarobot_batch_scoring.reader.chardet.detect') as cd:
            data = 'tests/fixtures/windows_encoded.csv'
            encoding = investigate_encoding_and_dialect(data, None, ui,
                                                        fast=False,
                                                        encoding='iso-8859-2',
                                                        skip_dialect=False)
        assert encoding == 'iso-8859-2'
        assert not cd.called
예제 #14
0
def test_investigate_encoding_and_dialect_skip_dialect():

    with UI(None, logging.DEBUG, stdout=False) as ui:
        with mock.patch('datarobot_batch_scoring.reader.csv.Sniffer') as sn:
            data = 'tests/fixtures/windows_encoded.csv'
            encoding = investigate_encoding_and_dialect(data, None, ui,
                                                        fast=False,
                                                        encoding='',
                                                        skip_dialect=True)
        assert encoding == 'iso-8859-2'
        assert not sn.called
        dialect = csv.get_dialect('dataset_dialect')
        assert dialect.delimiter == ','
예제 #15
0
def test_investigate_encoding_and_dialect_substitute_delimiter():

    with UI(None, logging.DEBUG, stdout=False) as ui:
        with mock.patch('datarobot_batch_scoring.reader.csv.Sniffer') as sn:
            data = 'tests/fixtures/windows_encoded.csv'
            encoding = investigate_encoding_and_dialect(data, '|', ui,
                                                        fast=False,
                                                        encoding='utf-8',
                                                        skip_dialect=True)
        assert encoding == 'utf-8'  # Intentionally wrong
        assert not sn.called
        dialect = csv.get_dialect('dataset_dialect')
        assert dialect.delimiter == '|'
예제 #16
0
def test_stdout_logging_and_csv_module_fail(capsys):
    with UI(None, logging.DEBUG, stdout=True) as ui:
        data = 'tests/fixtures/unparsable.csv'
        exc = str("""[ERROR] The csv module failed to detect the CSV """ +
                  """dialect. Try giving hints with the --delimiter """ +
                  """argument, E.g  --delimiter=','""")
        msg = ('{}\nIf you need assistance please send the output of this '
               'script to [email protected].').format(exc)
        with mock.patch('datarobot_batch_scoring.utils.sys.exit') as m_exit:
            with pytest.raises(csv.Error):
                investigate_encoding_and_dialect(data, None, ui)
            m_exit.assert_called_with(1)
        out, err = capsys.readouterr()
        assert msg in out.strip('\n')
예제 #17
0
 def test_fatal(self):
     with UI(None, logging.DEBUG, stdout=False) as ui:
         with mock.patch('datarobot_batch_scoring.utils.logger') as m_log:
             with mock.patch(
                     'datarobot_batch_scoring.utils.root_logger') as m_root:
                 with mock.patch('datarobot_batch_scoring.utils.sys.exit'
                                 '') as m_exit:
                     msg = ('{}\nIf you need assistance please send the '
                            'log file/s:\n{}to [email protected].'
                            '').format('text', ui.get_all_logfiles())
                     ui.fatal('text')
                     m_log.error.assert_called_with(msg)
                     m_root.error.assert_called_with(msg,
                                                     exc_info=(None, None,
                                                               None))
                     m_exit.assert_called_with(1)
예제 #18
0
 def test_prompt_yesno_always_no(self):
     ui = UI(False, logging.DEBUG)
     with mock.patch('datarobot_batch_scoring.utils.input') as m_input:
         assert not ui.prompt_yesno('msg')
         assert not m_input.called
예제 #19
0
 def test_prompt_yesno_user_input_n(self):
     with UI(None, logging.DEBUG, stdout=False) as ui:
         with mock.patch('datarobot_batch_scoring.utils.input') as m_input:
             m_input.return_value = 'n'
             assert not ui.prompt_yesno('msg')
             m_input.assert_called_with('msg (Yes/No)> ')
예제 #20
0
 def test_prompt_yesno_user_input_n(self):
     ui = UI(None, logging.DEBUG)
     with mock.patch('datarobot_batch_scoring.utils.input') as m_input:
         m_input.return_value = 'n'
         assert not ui.prompt_yesno('msg')
         m_input.assert_called_with('msg (Yes/No)> ')
예제 #21
0
def ui():
    '''Unique session identifier, random string.'''
    ui = UI(True, 'DEBUG', False)
    yield ui
    ui.close()
예제 #22
0
def test_parse_host(input, expected):
    with UI(None, logging.DEBUG, stdout=False) as ui:
        assert parse_host(input, ui) == expected
예제 #23
0
 def test_prompt_yesno_always_no(self):
     with UI(False, logging.DEBUG, stdout=False) as ui:
         with mock.patch('datarobot_batch_scoring.utils.input') as m_input:
             assert not ui.prompt_yesno('msg')
             assert not m_input.called
예제 #24
0
def test_auto_small_dataset():
    with UI(None, logging.DEBUG, stdout=False) as ui:
        data = 'tests/fixtures/regression_jp.csv.gz'
        encoding = investigate_encoding_and_dialect(data, None, ui)
        assert auto_sampler(data, encoding, ui) == 500
예제 #25
0
def test_auto_sample():
    with UI(None, logging.DEBUG, stdout=False) as ui:
        data = 'tests/fixtures/criteo_top30_1m.csv.gz'
        encoding = investigate_encoding_and_dialect(data, None, ui)
        assert auto_sampler(data, encoding, ui) == 14980
        ui.close()
예제 #26
0
def main(argv=sys.argv[1:]):
    global ui  # global variable hack, will get rid of a bit later
    warnings.simplefilter("ignore")
    parser = argparse.ArgumentParser(
        description=DESCRIPTION, epilog=EPILOG, formatter_class=argparse.RawDescriptionHelpFormatter
    )
    parser.add_argument(
        "--verbose", "-v", action="store_true", help="Provides status updates while " "the script is running."
    )
    parser.add_argument("--version", action="version", version=VERSION_TEMPLATE, help="Show version")
    dataset_gr = parser.add_argument_group("Dataset and server")
    dataset_gr.add_argument(
        "--host",
        type=str,
        help="Specifies the protocol (http or https) and "
        "hostname of the prediction API endpoint. "
        'E.g. "https://example.orm.datarobot.com"',
    )
    dataset_gr.add_argument("project_id", type=str, help="Specifies the project " "identification string.")
    dataset_gr.add_argument("model_id", type=str, help="Specifies the model identification string.")
    dataset_gr.add_argument("dataset", type=str, help="Specifies the .csv input file that " "the script scores.")
    dataset_gr.add_argument(
        "--out",
        type=str,
        nargs="?",
        default="out.csv",
        help="Specifies the file name, "
        "and optionally path, "
        "to which the results are written. "
        "If not specified, "
        "the default file name is out.csv, "
        "written to the directory containing the script. "
        "(default: %(default)r)",
    )
    auth_gr = parser.add_argument_group("Authentication parameters")
    auth_gr.add_argument(
        "--user",
        type=str,
        help="Specifies the username used to acquire " "the api-token. " "Use quotes if the name contains spaces.",
    )
    auth_gr.add_argument(
        "--password",
        type=str,
        nargs="?",
        help="Specifies the password used to acquire " "the api-token. " "Use quotes if the name contains spaces.",
    )
    auth_gr.add_argument(
        "--api_token",
        type=str,
        nargs="?",
        help="Specifies the api token for the requests; "
        "if you do not have a token, "
        "you must specify the password argument.",
    )
    auth_gr.add_argument(
        "--create_api_token",
        action="store_true",
        default=False,
        help="Requests a new API token. To use this option, "
        "you must specify the "
        "password argument for this request "
        "(not the api_token argument). "
        "(default: %(default)r)",
    )
    auth_gr.add_argument(
        "--datarobot_key",
        type=str,
        nargs="?",
        help="An additional datarobot_key " "for dedicated prediction instances.",
    )
    conn_gr = parser.add_argument_group("Connection control")
    conn_gr.add_argument(
        "--timeout", type=int, default=30, help="The timeout for each post request. " "(default: %(default)r)"
    )
    conn_gr.add_argument(
        "--n_samples",
        type=int,
        nargs="?",
        default=False,
        help="Specifies the number of samples (rows) to use "
        'per batch. If not defined the "auto_sample" option '
        "will be used.",
    )
    conn_gr.add_argument(
        "--n_concurrent",
        type=int,
        nargs="?",
        default=4,
        help="Specifies the number of concurrent requests " "to submit. (default: %(default)r)",
    )
    conn_gr.add_argument(
        "--n_retry",
        type=int,
        default=3,
        help="Specifies the number of times DataRobot "
        "will retry if a request fails. "
        "A value of -1, the default, specifies "
        "an infinite number of retries."
        "(default: %(default)r)",
    )
    conn_gr.add_argument(
        "--resume",
        action="store_true",
        default=False,
        help="Starts the prediction from the point at which "
        "it was halted. "
        "If the prediction stopped, for example due "
        "to error or network connection issue, you can run "
        "the same command with all the same "
        "all arguments plus this resume argument.",
    )
    csv_gr = parser.add_argument_group("CVS parameters")
    csv_gr.add_argument(
        "--keep_cols",
        type=str,
        nargs="?",
        help="Specifies the column names to append " "to the predictions. " "Enter as a comma-separated list.",
    )
    csv_gr.add_argument(
        "--delimiter",
        type=str,
        nargs="?",
        default=None,
        help="Specifies the delimiter to recognize in "
        'the input .csv file. E.g. "--delimiter=,". '
        "If not specified, the script tries to automatically "
        'determine the delimiter. The special keyword "tab" '
        "can be used to indicate a tab delimited csv.",
    )
    csv_gr.add_argument(
        "--pred_name",
        type=str,
        nargs="?",
        default=None,
        help="Specifies column name for prediction results, "
        "empty name is used if not specified. For binary "
        "predictions assumes last class in lexical order "
        "as positive",
    )
    csv_gr.add_argument(
        "--fast",
        action="store_true",
        default=False,
        help="Experimental: faster CSV processor. " "Note: does not support multiline csv. ",
    )
    csv_gr.add_argument(
        "--auto_sample",
        action="store_true",
        default=False,
        help='Override "n_samples" and instead '
        "use chunks of about 1.5 MB. This is recommended and "
        'enabled by default if "n_samples" is not defined.',
    )
    csv_gr.add_argument(
        "--encoding",
        type=str,
        default="",
        help="Declare the dataset encoding. "
        "If an encoding is not provided the batch_scoring "
        'script attempts to detect it. E.g "utf-8", "latin-1" '
        'or "iso2022_jp". See the Python docs for a list of '
        "valid encodings "
        "https://docs.python.org/3/library/codecs.html"
        "#standard-encodings",
    )
    csv_gr.add_argument(
        "--skip_dialect",
        action="store_true",
        default=False,
        help="Tell the batch_scoring script " "to skip csv dialect detection.",
    )
    csv_gr.add_argument("--skip_row_id", action="store_true", default=False, help="Skip the row_id column in output.")
    csv_gr.add_argument("--output_delimiter", type=str, default=None, help="Set the delimiter for output file.")
    misc_gr = parser.add_argument_group("Miscellaneous")
    misc_gr.add_argument("-y", "--yes", dest="prompt", action="store_true", help="Always answer 'yes' for user prompts")
    misc_gr.add_argument("-n", "--no", dest="prompt", action="store_false", help="Always answer 'no' for user prompts")
    misc_gr.add_argument(
        "--dry_run", dest="dry_run", action="store_true", help="Only read/chunk input data but dont send " "requests."
    )
    misc_gr.add_argument(
        "--stdout", action="store_true", dest="stdout", default=False, help="Send all log messages to stdout."
    )

    defaults = {
        "prompt": None,
        "out": "out.csv",
        "create_api_token": False,
        "timeout": 30,
        "n_samples": False,
        "n_concurrent": 4,
        "n_retry": 3,
        "resume": False,
        "fast": False,
        "stdout": False,
        "auto_sample": False,
    }

    conf_file = get_config_file()
    if conf_file:
        file_args = parse_config_file(conf_file)
        defaults.update(file_args)
    parser.set_defaults(**defaults)
    for action in parser._actions:
        if action.dest in defaults and action.required:
            action.required = False
            if "--" + action.dest not in argv:
                action.nargs = "?"
    parsed_args = {k: v for k, v in vars(parser.parse_args(argv)).items() if v is not None}
    loglevel = logging.DEBUG if parsed_args["verbose"] else logging.INFO
    stdout = parsed_args["stdout"]
    ui = UI(parsed_args.get("prompt"), loglevel, stdout)
    printed_args = copy.copy(parsed_args)
    printed_args.pop("password", None)
    ui.debug(printed_args)
    ui.info("platform: {} {}".format(sys.platform, sys.version))

    # parse args
    host = parsed_args["host"]
    pid = parsed_args["project_id"]
    lid = parsed_args["model_id"]
    n_retry = int(parsed_args["n_retry"])
    if parsed_args.get("keep_cols"):
        keep_cols = [s.strip() for s in parsed_args["keep_cols"].split(",")]
    else:
        keep_cols = None
    concurrent = int(parsed_args["n_concurrent"])
    dataset = parsed_args["dataset"]
    n_samples = int(parsed_args["n_samples"])
    delimiter = parsed_args.get("delimiter")
    resume = parsed_args["resume"]
    out_file = parsed_args["out"]
    datarobot_key = parsed_args.get("datarobot_key")
    timeout = int(parsed_args["timeout"])
    fast_mode = parsed_args["fast"]
    auto_sample = parsed_args["auto_sample"]
    if not n_samples:
        auto_sample = True
    encoding = parsed_args["encoding"]
    skip_dialect = parsed_args["skip_dialect"]
    skip_row_id = parsed_args["skip_row_id"]
    output_delimiter = parsed_args.get("output_delimiter")

    if "user" not in parsed_args:
        user = ui.prompt_user()
    else:
        user = parsed_args["user"].strip()

    if not os.path.exists(parsed_args["dataset"]):
        ui.fatal("file {} does not exist.".format(parsed_args["dataset"]))

    try:
        verify_objectid(pid)
        verify_objectid(lid)
    except ValueError as e:
        ui.fatal(str(e))

    if delimiter == "\\t" or delimiter == "tab":
        # NOTE: on bash you have to use Ctrl-V + TAB
        delimiter = "\t"

    if delimiter and delimiter not in VALID_DELIMITERS:
        ui.fatal('Delimiter "{}" is not a valid delimiter.'.format(delimiter))

    if output_delimiter == "\\t" or output_delimiter == "tab":
        # NOTE: on bash you have to use Ctrl-V + TAB
        output_delimiter = "\t"

    if output_delimiter and output_delimiter not in VALID_DELIMITERS:
        ui.fatal('Output delimiter "{}" is not a valid delimiter.'.format(output_delimiter))

    api_token = parsed_args.get("api_token")
    create_api_token = parsed_args.get("create_api_token")
    pwd = parsed_args.get("password")
    pred_name = parsed_args.get("pred_name")
    dry_run = parsed_args.get("dry_run", False)

    base_url = parse_host(host, ui)

    base_headers = {}
    if datarobot_key:
        base_headers["datarobot-key"] = datarobot_key

    ui.debug("batch_scoring v{}".format(__version__))
    ui.info("connecting to {}".format(base_url))

    try:
        run_batch_predictions(
            base_url=base_url,
            base_headers=base_headers,
            user=user,
            pwd=pwd,
            api_token=api_token,
            create_api_token=create_api_token,
            pid=pid,
            lid=lid,
            n_retry=n_retry,
            concurrent=concurrent,
            resume=resume,
            n_samples=n_samples,
            out_file=out_file,
            keep_cols=keep_cols,
            delimiter=delimiter,
            dataset=dataset,
            pred_name=pred_name,
            timeout=timeout,
            ui=ui,
            fast_mode=fast_mode,
            auto_sample=auto_sample,
            dry_run=dry_run,
            encoding=encoding,
            skip_dialect=skip_dialect,
            skip_row_id=skip_row_id,
            output_delimiter=output_delimiter,
        )
    except SystemError:
        pass
    except ShelveError as e:
        ui.error(str(e))
    except KeyboardInterrupt:
        ui.info("Keyboard interrupt")
    except Exception as e:
        ui.fatal(str(e))
    finally:
        ui.close()
예제 #27
0
 def test_warning(self):
     with UI(None, logging.DEBUG, stdout=False) as ui:
         with mock.patch('datarobot_batch_scoring.utils.logger') as m_log:
             ui.warning('text')
             m_log.warning.assert_called_with('text')
예제 #28
0
def parse_generic_options(parsed_args):
    global ui
    loglevel = logging.DEBUG if parsed_args['verbose'] else logging.INFO
    stdout = parsed_args['stdout']
    ui = UI(parsed_args.get('prompt'), loglevel, stdout)

    printed_args = copy.copy(parsed_args)
    printed_args.pop('password', None)
    ui.debug(printed_args)
    ui.info('version: {}'.format(__version__))
    ui.info('platform: {} {}'.format(sys.platform, sys.version))
    n_retry = int(parsed_args['n_retry'])
    if parsed_args.get('keep_cols'):
        keep_cols = [s.strip() for s in parsed_args['keep_cols'].split(',')]
    else:
        keep_cols = None
    concurrent = int(parsed_args['n_concurrent'])

    resume = parsed_args.get('resume')
    compression = parsed_args['compress']
    out_file = parsed_args['out']
    timeout = parsed_args.get('timeout')
    timeout = None if timeout is None else int(timeout)
    fast_mode = parsed_args['fast']
    encoding = parsed_args['encoding']
    skip_dialect = parsed_args['skip_dialect']
    skip_row_id = parsed_args['skip_row_id']
    field_size_limit = parsed_args.get('field_size_limit')
    pred_name = parsed_args.get('pred_name')
    pred_threshold_name = parsed_args.get('pred_threshold')
    pred_decision_name = parsed_args.get('pred_decision')
    dry_run = parsed_args.get('dry_run', False)

    n_samples = int(parsed_args['n_samples'])
    auto_sample = parsed_args['auto_sample']
    if not n_samples:
        auto_sample = True

    delimiter = parsed_args.get('delimiter')
    if delimiter == '\\t' or delimiter == 'tab':
        # NOTE: on bash you have to use Ctrl-V + TAB
        delimiter = '\t'
    elif delimiter == 'pipe':
        # using the | char has issues on Windows for some reason
        delimiter = '|'
    if delimiter and delimiter not in VALID_DELIMITERS:
        ui.fatal('Delimiter "{}" is not a valid delimiter.'.format(delimiter))

    output_delimiter = parsed_args.get('output_delimiter')
    if output_delimiter == '\\t' or output_delimiter == 'tab':
        # NOTE: on bash you have to use Ctrl-V + TAB
        output_delimiter = '\t'
    elif output_delimiter == 'pipe':
        output_delimiter = '|'
    if output_delimiter and output_delimiter not in VALID_DELIMITERS:
        ui.fatal('Output delimiter "{}" is not a valid delimiter.'.format(
            output_delimiter))

    dataset = parsed_args['dataset']
    if not os.path.exists(dataset):
        ui.fatal('file {} does not exist.'.format(dataset))
    api_version = parsed_args['api_version']
    max_prediction_explanations = parsed_args['max_prediction_explanations']
    if api_version == API_V1 and max_prediction_explanations > 0:
        ui.fatal('Prediction explanation is not available for '
                 'api_version `api/v1` please use the '
                 '`predApi/v1.0` or deployments endpoint')

    ui.debug('batch_scoring v{}'.format(__version__))

    return {
        'auto_sample': auto_sample,
        'compression': compression,
        'concurrent': concurrent,
        'dataset': dataset,
        'delimiter': delimiter,
        'dry_run': dry_run,
        'encoding': encoding,
        'fast_mode': fast_mode,
        'field_size_limit': field_size_limit,
        'keep_cols': keep_cols,
        'n_retry': n_retry,
        'n_samples': n_samples,
        'out_file': out_file,
        'output_delimiter': output_delimiter,
        'pred_name': pred_name,
        'pred_threshold_name': pred_threshold_name,
        'pred_decision_name': pred_decision_name,
        'resume': resume,
        'skip_dialect': skip_dialect,
        'skip_row_id': skip_row_id,
        'timeout': timeout,
        'verify_ssl': parsed_args['verify_ssl'],
        'max_prediction_explanations':
        parsed_args['max_prediction_explanations'],
    }
예제 #29
0
 def test_prompt_user(self):
     ui = UI(None, logging.DEBUG)
     with mock.patch('datarobot_batch_scoring.utils.input') as m_input:
         m_input.return_value = 'Andrew'
         assert ui.prompt_user() == 'Andrew'
         m_input.assert_called_with('user name> ')
예제 #30
0
def parse_generic_options(parsed_args):
    global ui
    loglevel = logging.DEBUG if parsed_args['verbose'] else logging.INFO
    stdout = parsed_args['stdout']
    ui = UI(parsed_args.get('prompt'), loglevel, stdout)

    printed_args = copy.copy(parsed_args)
    printed_args.pop('password', None)
    ui.debug(printed_args)
    ui.info('platform: {} {}'.format(sys.platform, sys.version))
    n_retry = int(parsed_args['n_retry'])
    if parsed_args.get('keep_cols'):
        keep_cols = [s.strip() for s in parsed_args['keep_cols'].split(',')]
    else:
        keep_cols = None
    concurrent = int(parsed_args['n_concurrent'])

    resume = parsed_args['resume']
    compression = parsed_args['compress']
    out_file = parsed_args['out']
    timeout = int(parsed_args['timeout'])
    fast_mode = parsed_args['fast']
    encoding = parsed_args['encoding']
    skip_dialect = parsed_args['skip_dialect']
    skip_row_id = parsed_args['skip_row_id']
    host = parsed_args.get('host')
    pred_name = parsed_args.get('pred_name')
    dry_run = parsed_args.get('dry_run', False)
    base_url = ""

    n_samples = int(parsed_args['n_samples'])
    auto_sample = parsed_args['auto_sample']
    if not n_samples:
        auto_sample = True

    delimiter = parsed_args.get('delimiter')
    if delimiter == '\\t' or delimiter == 'tab':
        # NOTE: on bash you have to use Ctrl-V + TAB
        delimiter = '\t'
    elif delimiter == 'pipe':
        # using the | char has issues on Windows for some reason
        delimiter = '|'
    if delimiter and delimiter not in VALID_DELIMITERS:
        ui.fatal('Delimiter "{}" is not a valid delimiter.'.format(delimiter))

    output_delimiter = parsed_args.get('output_delimiter')
    if output_delimiter == '\\t' or output_delimiter == 'tab':
        # NOTE: on bash you have to use Ctrl-V + TAB
        output_delimiter = '\t'
    elif output_delimiter == 'pipe':
        output_delimiter = '|'
    if output_delimiter and output_delimiter not in VALID_DELIMITERS:
        ui.fatal('Output delimiter "{}" is not a valid delimiter.'.format(
            output_delimiter))

    dataset = parsed_args['dataset']
    if not os.path.exists(dataset):
        ui.fatal('file {} does not exist.'.format(dataset))

    if not dry_run:
        base_url = parse_host(host, ui)

    ui.debug('batch_scoring v{}'.format(__version__))
    ui.info('connecting to {}'.format(base_url))

    return {
        'auto_sample': auto_sample,
        'base_url': base_url,
        'compression': compression,
        'concurrent': concurrent,
        'dataset': dataset,
        'delimiter': delimiter,
        'dry_run': dry_run,
        'encoding': encoding,
        'fast_mode': fast_mode,
        'keep_cols': keep_cols,
        'n_retry': n_retry,
        'n_samples': n_samples,
        'out_file': out_file,
        'output_delimiter': output_delimiter,
        'pred_name': pred_name,
        'resume': resume,
        'skip_dialect': skip_dialect,
        'skip_row_id': skip_row_id,
        'timeout': timeout,
    }
예제 #31
0
def parse_generic_options(parsed_args):
    global ui
    loglevel = logging.DEBUG if parsed_args['verbose'] else logging.INFO
    stdout = parsed_args['stdout']
    ui = UI(parsed_args.get('prompt'), loglevel, stdout)

    printed_args = copy.copy(parsed_args)
    printed_args.pop('password', None)
    ui.debug(printed_args)
    ui.info('version: {}'.format(__version__))
    ui.info('platform: {} {}'.format(sys.platform, sys.version))
    n_retry = int(parsed_args['n_retry'])
    if parsed_args.get('keep_cols'):
        keep_cols = [s.strip() for s in parsed_args['keep_cols'].split(',')]
    else:
        keep_cols = None
    concurrent = int(parsed_args['n_concurrent'])

    resume = parsed_args.get('resume')
    compression = parsed_args['compress']
    out_file = parsed_args['out']
    timeout = parsed_args.get('timeout')
    timeout = None if timeout is None else int(timeout)
    fast_mode = parsed_args['fast']
    encoding = parsed_args['encoding']
    skip_dialect = parsed_args['skip_dialect']
    skip_row_id = parsed_args['skip_row_id']
    field_size_limit = parsed_args.get('field_size_limit')
    pred_name = parsed_args.get('pred_name')
    dry_run = parsed_args.get('dry_run', False)

    n_samples = int(parsed_args['n_samples'])
    auto_sample = parsed_args['auto_sample']
    if not n_samples:
        auto_sample = True

    delimiter = parsed_args.get('delimiter')
    if delimiter == '\\t' or delimiter == 'tab':
        # NOTE: on bash you have to use Ctrl-V + TAB
        delimiter = '\t'
    elif delimiter == 'pipe':
        # using the | char has issues on Windows for some reason
        delimiter = '|'
    if delimiter and delimiter not in VALID_DELIMITERS:
        ui.fatal('Delimiter "{}" is not a valid delimiter.'
                 .format(delimiter))

    output_delimiter = parsed_args.get('output_delimiter')
    if output_delimiter == '\\t' or output_delimiter == 'tab':
        # NOTE: on bash you have to use Ctrl-V + TAB
        output_delimiter = '\t'
    elif output_delimiter == 'pipe':
        output_delimiter = '|'
    if output_delimiter and output_delimiter not in VALID_DELIMITERS:
        ui.fatal('Output delimiter "{}" is not a valid delimiter.'
                 .format(output_delimiter))

    dataset = parsed_args['dataset']
    if not os.path.exists(dataset):
        ui.fatal('file {} does not exist.'.format(dataset))

    ui.debug('batch_scoring v{}'.format(__version__))

    return {
        'auto_sample': auto_sample,
        'compression': compression,
        'concurrent': concurrent,
        'dataset': dataset,
        'delimiter': delimiter,
        'dry_run': dry_run,
        'encoding': encoding,
        'fast_mode': fast_mode,
        'field_size_limit': field_size_limit,
        'keep_cols': keep_cols,
        'n_retry': n_retry,
        'n_samples': n_samples,
        'out_file': out_file,
        'output_delimiter': output_delimiter,
        'pred_name': pred_name,
        'resume': resume,
        'skip_dialect': skip_dialect,
        'skip_row_id': skip_row_id,
        'timeout': timeout,
        'verify_ssl': parsed_args['verify_ssl'],
    }
예제 #32
0
 def test_warning(self):
     ui = UI(None, logging.DEBUG)
     with mock.patch('datarobot_batch_scoring.utils.logger') as m_log:
         ui.warning('text')
         m_log.warning.assert_called_with('text')