Exemple #1
0
def test_header_only(live_server):
    ui = PickableMock()
    base_url = '{webhost}/api/v1/'.format(webhost=live_server.url())
    with pytest.raises(ValueError) as ctx:
        run_batch_predictions(base_url=base_url,
                              base_headers={},
                              user='******',
                              pwd='password',
                              api_token=None,
                              create_api_token=False,
                              pid='56dd9570018e213242dfa93c',
                              lid='56dd9570018e213242dfa93d',
                              import_id=None,
                              n_retry=3,
                              concurrent=1,
                              resume=False,
                              n_samples=10,
                              out_file='out.csv',
                              keep_cols=None,
                              delimiter=',',
                              dataset='tests/fixtures/header_only.csv',
                              pred_name=None,
                              timeout=30,
                              ui=ui,
                              auto_sample=False,
                              fast_mode=False,
                              dry_run=False,
                              encoding='',
                              skip_dialect=False)
    assert str(ctx.value) == ("Input file 'tests/fixtures/header_only.csv' "
                              "is empty.")
Exemple #2
0
def test_explicit_delimiter_gzip(live_server):
    ui = PickableMock()
    base_url = '{webhost}/api/v1/'.format(webhost=live_server.url())
    ret = run_batch_predictions(
        base_url=base_url,
        base_headers={},
        user='******',
        pwd='password',
        api_token=None,
        create_api_token=False,
        pid='56dd9570018e213242dfa93c',
        lid='56dd9570018e213242dfa93d',
        import_id=None,
        n_retry=3,
        concurrent=1,
        resume=False,
        n_samples=10,
        out_file='out.csv',
        keep_cols=None,
        delimiter=',',
        dataset='tests/fixtures/temperatura_predict.csv.gz',
        pred_name=None,
        timeout=30,
        ui=ui,
        auto_sample=False,
        fast_mode=False,
        dry_run=False,
        encoding='',
        skip_dialect=False)

    assert ret is None
Exemple #3
0
def test_bad_newline(live_server):
    ui = PickableMock()
    base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url())

    run_batch_predictions(base_url=base_url,
                          base_headers={},
                          user='******',
                          pwd='password',
                          api_token=None,
                          create_api_token=False,
                          pid='56dd9570018e213242dfa93c',
                          lid='56dd9570018e213242dfa93d',
                          import_id=None,
                          n_retry=3,
                          concurrent=1,
                          resume=False,
                          n_samples=10,
                          out_file='out.csv',
                          keep_cols=None,
                          delimiter=',',
                          dataset='tests/fixtures/diabetes_bad_newline.csv',
                          pred_name=None,
                          timeout=None,
                          ui=ui,
                          auto_sample=False,
                          fast_mode=False,
                          dry_run=False,
                          encoding='',
                          skip_dialect=False)

    lines = len(open('out.csv', 'rb').readlines())

    assert lines == 5
    ui.warning.assert_any_call('Detected empty rows in the CSV file. '
                               'These rows will be discarded.')
def test_create_and_acquire_api_token(live_server):
    ui = PickableMock()
    base_url = '{webhost}/api/v1/'.format(webhost=live_server.url())
    ret = acquire_api_token(base_url, {}, 'username', 'password', True, ui)
    assert ret == 'Som3tok3n'
    ui.info.assert_called_with('api-token acquired')
    ui.debug.assert_called_with('api-token: Som3tok3n')
def test_acquire_api_token_bad_status(live_server):
    ui = PickableMock()
    base_url = '{webhost}/api/v1/'.format(webhost=live_server.url())
    with pytest.raises(ValueError) as ctx:
        acquire_api_token(base_url, {}, 'bad_status', 'passwd', False, ui)
        assert not ui.info.called
        assert not ui.debug.called
    assert str(ctx.value) == 'api_token request returned status code 500'
def test_acquire_api_token_unauthorized(live_server):
    ui = PickableMock()
    base_url = '{webhost}/api/v1/'.format(webhost=live_server.url())
    with pytest.raises(ValueError) as ctx:
        acquire_api_token(base_url, {}, 'unknown', 'passwd', False, ui)
        assert not ui.info.called
        assert not ui.debug.called
    assert str(ctx.value) == 'wrong credentials'
def test_acquire_api_token_no_token2(live_server):
    ui = PickableMock()
    base_url = '{webhost}/api/v1/'.format(webhost=live_server.url())
    with pytest.raises(ValueError) as ctx:
        acquire_api_token(base_url, {}, 'no_token2', 'passwd', False, ui)
        assert not ui.info.called
        assert not ui.debug.called
    assert str(ctx.value) == ('no api-token registered; '
                              'please run with --create_api_token flag.')
def test_quotechar_in_keep_cols(live_server):
    base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url())
    ui = PickableMock()
    with tempfile.NamedTemporaryFile(prefix='test_',
                                     suffix='.csv',
                                     delete=False) as fd:
        head = open("tests/fixtures/quotes_input_head.csv", "rb").read()
        body_1 = open("tests/fixtures/quotes_input_first_part.csv",
                      "rb").read()
        body_2 = open("tests/fixtures/quotes_input_bad_part.csv", "rb").read()
        fd.file.write(head)
        size = 0
        while size < DETECT_SAMPLE_SIZE_SLOW:
            fd.file.write(body_1)
            size += len(body_1)
        fd.file.write(body_2)
        fd.close()

        ret = run_batch_predictions(base_url=base_url,
                                    base_headers={},
                                    user='******',
                                    pwd='password',
                                    api_token=None,
                                    create_api_token=False,
                                    pid='56dd9570018e213242dfa93c',
                                    lid='56dd9570018e213242dfa93d',
                                    import_id=None,
                                    n_retry=3,
                                    concurrent=1,
                                    resume=False,
                                    n_samples=10,
                                    out_file='out.csv',
                                    keep_cols=["b", "c"],
                                    delimiter=None,
                                    dataset=fd.name,
                                    pred_name=None,
                                    timeout=None,
                                    ui=ui,
                                    auto_sample=True,
                                    fast_mode=False,
                                    dry_run=False,
                                    encoding='',
                                    skip_dialect=False)
        assert ret is None

        last_line = open("out.csv", "rb").readlines()[-1]
        expected_last_line = b'1044,2,"eeeeeeee ""eeeeee"" eeeeeeeeeeee'
        assert last_line[:len(expected_last_line)] == expected_last_line
Exemple #9
0
def check_regression_jp(live_server, tmpdir, fast_mode, gzipped):
    """Use utf8 encoded input data.

    """
    if fast_mode:
        out_fname = 'out_fast.csv'
    else:
        out_fname = 'out.csv'
    out = tmpdir.join(out_fname)

    dataset_suffix = '.gz' if gzipped else ''

    ui = PickableMock()
    base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url())
    ret = run_batch_predictions(
        base_url=base_url,
        base_headers={},
        user='******',
        pwd='password',
        api_token=None,
        create_api_token=False,
        pid='56dd9570018e213242dfa93c',
        lid='56dd9570018e213242dfa93e',
        import_id=None,
        n_retry=3,
        concurrent=1,
        resume=False,
        n_samples=500,
        out_file=str(out),
        keep_cols=None,
        delimiter=None,
        dataset='tests/fixtures/regression_jp.csv' + dataset_suffix,
        pred_name='new_name',
        timeout=None,
        ui=ui,
        auto_sample=False,
        fast_mode=fast_mode,
        dry_run=False,
        encoding='',
        skip_dialect=False,
        compression=True
    )
    assert ret is None

    actual = out.read_text('utf-8')

    with open('tests/fixtures/regression_output_jp.csv', 'rU') as f:
        assert actual == f.read()
Exemple #10
0
def test_pred_threshold_classification(live_server, tmpdir, func_params):
    # train one model in project
    out = tmpdir.join('out.csv')

    ui = PickableMock()
    base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url())
    ret = run_batch_predictions(
        base_url=base_url,
        base_headers={},
        user='******',
        pwd='password',
        api_token=None,
        create_api_token=False,
        deployment_id=func_params['deployment_id'],
        pid=func_params['pid'],
        lid=func_params['lid'],
        import_id=None,
        n_retry=3,
        concurrent=1,
        resume=False,
        n_samples=10,
        out_file=str(out),
        keep_cols=None,
        delimiter=None,
        dataset='tests/fixtures/temperatura_predict.csv',
        pred_name='healthy',
        pred_threshold_name='threshold',
        timeout=None,
        ui=ui,
        auto_sample=False,
        fast_mode=False,
        dry_run=False,
        encoding='',
        skip_dialect=False
    )

    assert ret is None

    expected = out.read_text('utf-8')
    with open(
        'tests/fixtures/temperatura_output_healthy_threshold.csv', 'rU'
    ) as f:
        assert expected == f.read(), expected
Exemple #11
0
def test_prediction_explanations_keepcols(live_server, tmpdir):
    # train one model in project
    out = tmpdir.join('out.csv')

    ui = PickableMock()
    base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url())
    ret = run_batch_predictions(
        base_url=base_url,
        base_headers={},
        user='******',
        pwd='password',
        api_token=None,
        create_api_token=False,
        pid='5afb150782c7dd45fcc03951',
        lid='5b2cad28aa1d12847310acf4',
        import_id=None,
        n_retry=3,
        concurrent=1,
        resume=False,
        n_samples=10,
        out_file=str(out),
        keep_cols=['medical_specialty', 'number_diagnoses'],
        delimiter=None,
        dataset='tests/fixtures/10kDiabetes.csv',
        pred_name=None,
        pred_threshold_name=None,
        pred_decision_name=None,
        timeout=None,
        ui=ui,
        auto_sample=False,
        fast_mode=False,
        dry_run=False,
        encoding='',
        skip_dialect=False,
        max_prediction_explanations=5
    )

    assert ret is None
    actual = out.read_text('utf-8')
    file_path = 'tests/fixtures/10kDiabetes_5explanations_keepcols.csv'
    with open(file_path, 'rU') as f:
        expected = f.read()
    assert str(actual) == str(expected), expected
Exemple #12
0
def test_simple_transferable(live_server, tmpdir):
    # train one model in project
    out = tmpdir.join('out.csv')

    ui = PickableMock()
    base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url())
    ret = run_batch_predictions(
        base_url=base_url,
        base_headers={},
        user='******',
        pwd='password',
        api_token=None,
        create_api_token=False,
        import_id='0ec5bcea7f0f45918fa88257bfe42c09',
        pid=None,
        lid=None,
        n_retry=3,
        concurrent=1,
        resume=False,
        n_samples=10,
        out_file=str(out),
        keep_cols=None,
        delimiter=None,
        dataset='tests/fixtures/regression_predict.csv',
        pred_name=None,
        pred_threshold_name=None,
        pred_decision_name=None,
        timeout=None,
        ui=ui,
        auto_sample=False,
        fast_mode=False,
        dry_run=False,
        encoding='',
        skip_dialect=False
    )

    assert ret is None
    actual = out.read_text('utf-8')
    with open('tests/fixtures/regression_output.csv', 'rU') as f:
        expected = f.read()
    assert str(actual) == str(expected), expected
Exemple #13
0
def test_simple_with_unicode(live_server, tmpdir, func_params, dataset_name):
    # train one model in project
    out = tmpdir.join('out.csv')
    ui = PickableMock()
    base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url())
    ret = run_batch_predictions(
        base_url=base_url,
        base_headers={},
        user='******',
        pwd='password',
        api_token=None,
        create_api_token=False,
        deployment_id=func_params['deployment_id'],
        pid=func_params['pid'],
        lid=func_params['lid'],
        import_id=None,
        n_retry=3,
        concurrent=1,
        resume=False,
        n_samples=10,
        out_file=str(out),
        keep_cols=None,
        delimiter=None,
        dataset='tests/fixtures/{}'.format(dataset_name),
        pred_name=None,
        pred_threshold_name=None,
        pred_decision_name=None,
        timeout=None,
        ui=ui,
        auto_sample=False,
        fast_mode=False,
        dry_run=False,
        encoding='',
        skip_dialect=False)

    assert ret is None
    actual = out.read_text('utf-8')
    with open('tests/fixtures/jpReview_books_reg_out.csv', 'rU') as f:
        expected = f.read()
    assert str(actual) == str(expected), expected
def test_simple_api_v1(live_server, tmpdir):
    # train one model in project
    out = tmpdir.join('out.csv')

    ui = PickableMock()
    base_url = '{webhost}/api/v1/'.format(webhost=live_server.url())
    ret = run_batch_predictions(
        base_url=base_url,
        base_headers={},
        user='******',
        pwd='password',
        api_token=None,
        create_api_token=False,
        pid='56dd9570018e213242dfa93c',
        lid='56dd9570018e213242dfa93f',
        import_id=None,
        n_retry=3,
        concurrent=1,
        resume=False,
        n_samples=10,
        out_file=str(out),
        keep_cols=None,
        delimiter=None,
        dataset='tests/fixtures/temperatura_predict.csv.gz',
        pred_name=None,
        timeout=None,
        ui=ui,
        auto_sample=False,
        fast_mode=False,
        dry_run=False,
        encoding='',
        skip_dialect=False
    )

    assert ret is None
    actual = out.read_text('utf-8')
    with open('tests/fixtures/temperatura_api_v1_output.csv', 'rU') as f:
        expected = f.read()
    assert str(actual) == str(expected), expected
Exemple #15
0
def test_simple_with_wrong_encoding(live_server, tmpdir, func_params):
    out = tmpdir.join('out.csv')
    ui = PickableMock()
    base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url())
    with pytest.raises(UnicodeDecodeError) as execinfo:
        run_batch_predictions(base_url=base_url,
                              base_headers={},
                              user='******',
                              pwd='password',
                              api_token=None,
                              create_api_token=False,
                              deployment_id=func_params['deployment_id'],
                              pid=func_params['pid'],
                              lid=func_params['lid'],
                              import_id=None,
                              n_retry=3,
                              concurrent=1,
                              resume=False,
                              n_samples=10,
                              out_file=str(out),
                              keep_cols=None,
                              delimiter=None,
                              dataset='tests/fixtures/jpReview_books_reg.csv',
                              pred_name=None,
                              pred_threshold_name=None,
                              pred_decision_name=None,
                              timeout=None,
                              ui=ui,
                              auto_sample=False,
                              fast_mode=False,
                              dry_run=False,
                              encoding='cp932',
                              skip_dialect=False)

    # Fixture dataset encoding 'utf-8' and we trying to decode it with 'cp932'
    assert "'cp932' codec can't decode byte" in str(execinfo.value)