Exemple #1
0
    def test_process_cleaning_results(self, _m_get_api_spec):
        mock_job_id = 42
        mock_run_id = 1776
        mock_file_id = 312
        fut = civis.futures.CivisFuture(poller=lambda j, r: (j, r),
                                        poller_args=(mock_job_id, mock_run_id),
                                        poll_on_creation=False)
        fut.set_result(Response({'state': 'success'}))

        self.mock_client.jobs.list_runs_outputs.return_value = [Response(
            {'object_id': mock_file_id}
        )]

        expected_columns = [{'name': 'a', 'sql_type': 'INT'},
                            {'name': 'column', 'sql_type': 'INT'}]
        expected_compression = 'gzip'
        expected_headers = True
        expected_delimiter = ','
        self.mock_client.files.get.return_value.detected_info = {
            'tableColumns': expected_columns,
            'compression': expected_compression,
            'includeHeader': expected_headers,
            'columnDelimiter': expected_delimiter
        }

        assert civis.io._tables._process_cleaning_results(
            [fut], self.mock_client, None, True, None
        ) == (
            [mock_file_id], expected_headers, expected_compression,
            expected_delimiter, expected_columns
        )
Exemple #2
0
def create_client_mock_for_container_tests(script_id=-10,
                                           run_id=100,
                                           state='succeeded',
                                           run_outputs=None,
                                           log_outputs=None):
    """Returns a CivisAPIClient Mock set up for testing methods that use
    container scripts. Contains endpoint method mocks and return values
    for posting container jobs, retrieving outputs, and reading logs.
    Also contains the mocks to cancel the container when the state
    is set to 'failed'.

    Parameters
    ----------
    script_id: int
        Mock-create containers with this ID when calling `post_containers`
        or `post_containers_runs`.
    run_id: int
        Mock-create runs with this ID when calling `post_containers_runs`.
    state: str, optional
        The reported state of the container run
    run_outputs: list, optional
        List of Response objects returned as run outputs
    log_outputs : list, optional
        List of Response objects returned as log outputs

    Returns
    -------
    `unittest.mock.Mock`
        With scripts endpoints `post_containers`, `post_containers_runs`,
        `post_cancel`, and `get_containers_runs` set up.
    """
    c = create_client_mock()

    mock_container = Response({'id': script_id})
    c.scripts.post_containers.return_value = mock_container
    mock_container_run_start = Response({
        'id': run_id,
        'container_id': script_id,
        'state': 'queued'
    })
    mock_container_run = Response({
        'id': run_id,
        'container_id': script_id,
        'state': state
    })
    if state == 'failed':
        mock_container_run['error'] = 'None'
    c.scripts.post_containers_runs.return_value = mock_container_run_start
    c.scripts.get_containers_runs.return_value = mock_container_run
    c.scripts.list_containers_runs_outputs.return_value = (run_outputs or [])
    c.jobs.list_runs_logs.return_value = (log_outputs or [])

    def change_state_to_cancelled(script_id):
        mock_container_run.state = "cancelled"
        return mock_container_run

    c.scripts.post_cancel.side_effect = change_state_to_cancelled

    return c
def setup_client_mock(script_id=-10,
                      run_id=100,
                      state='succeeded',
                      run_outputs=None):
    """Return a Mock set up for use in testing container scripts

    Parameters
    ----------
    script_id: int
        Mock-create containers with this ID when calling `post_containers`
        or `post_containers_runs`.
    run_id: int
        Mock-create runs with this ID when calling `post_containers_runs`.
    state: str, optional
        The reported state of the container run
    run_outputs: list, optional
        List of Response objects returned as run outputs

    Returns
    -------
    `unittest.mock.Mock`
        With scripts endpoints `post_containers`, `post_containers_runs`,
        `post_cancel`, and `get_containers_runs` set up.
    """
    c = mock.Mock()
    c.__class__ = APIClient

    mock_container = Response({'id': script_id})
    c.scripts.post_containers.return_value = mock_container
    mock_container_run_start = Response({
        'id': run_id,
        'container_id': script_id,
        'state': 'queued'
    })
    mock_container_run = Response({
        'id': run_id,
        'container_id': script_id,
        'state': state
    })
    c.scripts.post_containers_runs.return_value = mock_container_run_start
    c.scripts.get_containers_runs.return_value = mock_container_run
    c.scripts.list_containers_runs_outputs.return_value = (run_outputs or [])
    c.scripts.list_containers_runs_logs.return_value = []

    def change_state_to_cancelled(script_id):
        mock_container_run.state = "cancelled"
        return mock_container_run

    c.scripts.post_cancel.side_effect = change_state_to_cancelled

    # Avoid channels endpoint while testing here
    del c.channels

    return c
Exemple #4
0
    def test_process_cleaning_results_raises_imports(self, _m_get_api_spec):
        mock_job_id = 42
        mock_run_id = 1776
        mock_file_id = 312
        fut = civis.futures.CivisFuture(poller=lambda j, r: (j, r),
                                        poller_args=(mock_job_id, mock_run_id),
                                        poll_on_creation=False)
        fut.set_result(Response({'state': 'success'}))

        fut2 = civis.futures.CivisFuture(poller=lambda j, r: (j, r),
                                         poller_args=(mock_job_id,
                                                      mock_run_id),
                                         poll_on_creation=False)
        fut2.set_result(Response({'state': 'success'}))

        self.mock_client.jobs.list_runs_outputs.return_value = [
            Response({'object_id': mock_file_id})
        ]

        expected_compression = 'gzip'
        expected_headers = True
        expected_cols = [{
            'name': 'a',
            'sql_type': 'INT'
        }, {
            'name': 'column',
            'sql_type': 'INT'
        }]
        resp1 = Response({
            'detected_info': {
                'tableColumns': expected_cols,
                'compression': expected_compression,
                'includeHeader': expected_headers,
                'columnDelimiter': ','
            }
        })

        resp2 = Response({
            'detected_info': {
                'tableColumns': expected_cols,
                'compression': expected_compression,
                'includeHeader': expected_headers,
                'columnDelimiter': '|'
            }
        })
        self.mock_client.files.get.side_effect = [resp1, resp2]

        with pytest.raises(CivisImportError,
                           match='Provided delimiter "|" does not match '
                           'detected delimiter'):
            civis.io._tables._process_cleaning_results([fut, fut2],
                                                       self.mock_client, None,
                                                       True, None)
Exemple #5
0
def test_file_id_from_run_output_approximate_multiple():
    # Fuzzy name matching with muliple matches should return the first
    m_cl = mock.Mock()
    m_cl.scripts.list_containers_runs_outputs.return_value = [
        Response({'name': 'spam.csv.gz', 'object_id': 2013,
                  'object_type': 'File'}),
        Response({'name': 'eggs.csv.gz', 'object_id': 2014,
                  'object_type': 'File'})]

    fid = civis.io.file_id_from_run_output('.csv', 17, 13, regex=True,
                                           client=m_cl)
    assert fid == 2013
Exemple #6
0
def _create_share_model_client_mock(run_ids):
    m_client = create_client_mock()
    m_client.scripts.put_containers_shares_users.return_value = 'usershare'
    m_client.scripts.put_containers_shares_groups.return_value = 'groupshare'
    m_client.scripts.delete_containers_shares_users.return_value = 'userdel'
    m_client.scripts.delete_containers_shares_groups.return_value = 'groupdel'
    m_client.scripts.list_containers_runs.return_value = [
        Response({'id': _id}) for _id in run_ids]
    m_client.scripts.list_containers_runs_outputs.return_value = [
        Response({'object_id': 117, 'object_type': 'File', 'name': 'fname'}),
        Response({'object_id': 31, 'object_type': 'Project'}),
        Response({'object_id': 37, 'object_type': 'JSONValue'}),
    ]
    return m_client
def test_infer_from_custom_job(mock_make_factory):
    # Test that `infer_backend_factory` can find needed
    # parameters if it's run inside a custom job created
    # from a template.
    mock_client = mock.MagicMock()
    mock_custom = Response(dict(from_template_id=999, id=42,
                                required_resources=None,
                                params=[{'name': 'spam'}],
                                arguments={'spam': 'eggs'},
                                docker_image_name='image_name',
                                docker_image_tag='tag',
                                repo_http_uri='cabbage', repo_ref='servant'))
    mock_script = mock_job()
    mock_template = Response(dict(id=999, script_id=171))

    def _get_container(job_id):
        if int(job_id) == 42:
            return mock_custom
        elif int(job_id) == 171:
            return mock_script
        else:
            raise ValueError("Got job_id {}".format(job_id))

    mock_client.scripts.get_containers.side_effect = _get_container
    mock_client.templates.get_scripts.return_value = mock_template
    with mock.patch.dict('os.environ', {'CIVIS_JOB_ID': "42",
                                        'CIVIS_RUN_ID': "test_run"}):
        civis.parallel.infer_backend_factory(
            client=mock_client)

    # We should have called `get_containers` twice now -- once for
    # the container we're running in, and a second time for the
    # container which backs the template this job was created from.
    # The backing script has settings which aren't visible from
    # the container which was created from it.
    assert mock_client.scripts.get_containers.call_count == 2
    mock_client.templates.get_scripts.assert_called_once_with(999)
    expected_kwargs = {'required_resources': {'cpu': 11},
                       'params': [{'name': 'spam'}],
                       'arguments': {'spam': 'eggs'},
                       'client': mock.ANY,
                       'polling_interval': mock.ANY,
                       'setup_cmd': None,
                       'max_submit_retries': mock.ANY,
                       'max_job_retries': mock.ANY,
                       'hidden': True,
                       'remote_backend': 'civis'}
    for key in civis.parallel.KEYS_TO_INFER:
        expected_kwargs[key] = mock_script[key]
    mock_make_factory.assert_called_once_with(**expected_kwargs)
Exemple #8
0
def test_sql_script():
    sql = "SELECT SPECIAL SQL QUERY"
    export_job_id = 32
    database_id = 29
    credential_id = 3920
    response = Response({'id': export_job_id})

    mock_client = create_client_mock()
    mock_client.scripts.post_sql.return_value = response
    mock_client.get_database_id.return_value = database_id
    mock_client.default_credential = credential_id

    civis.io._tables._sql_script(client=mock_client,
                                 sql=sql,
                                 database='fake-db',
                                 job_name='My job',
                                 credential_id=None,
                                 hidden=False,
                                 csv_settings=None)
    mock_client.scripts.post_sql.assert_called_once_with(
        'My job',
        remote_host_id=database_id,
        credential_id=credential_id,
        sql=sql,
        hidden=False,
        csv_settings={})
    mock_client.scripts.post_sql_runs.assert_called_once_with(export_job_id)
def create_mock_client_with_job():
    mock_client = create_client_mock()

    job_response = Response({"id": 1, "name": "test"})
    run_post_response = Response({"id": 1})
    run_get_response = Response({
        "id": run_post_response.id,
        "state": "succeeded",
        "is_cancel_requested": False,
        "error": None,
        "custom_id": run_post_response.id,
    })
    mock_client.scripts.post_custom.return_value = job_response
    mock_client.scripts.post_custom_runs.return_value = run_post_response
    mock_client.scripts.get_custom_runs.return_value = run_get_response
    return mock_client
def mock_job():
    return Response(
        dict(params=[{
            'name': 'spam'
        }],
             arguments={'spam': 'eggs'},
             **_MOCK_JOB_KWARGS))
Exemple #11
0
def test_file_to_dataframe_infer_gzip():
    m_client = mock.Mock()
    m_client.files.get.return_value = Response({'name': 'spam.csv.gz',
                                                'file_url': 'url'})
    with mock.patch.object(civis.io._files.pd, 'read_csv') as mock_read_csv:
        civis.io.file_to_dataframe(121, compression='infer', client=m_client)
        assert mock_read_csv.called_once_with(121, compression='gzip')
def test_modelpipeline_classmethod_constructor(mock_future,
                                               container_response_stub):
    mock_client = mock.Mock()
    mock_client.scripts.get_containers.return_value = \
        container = container_response_stub
    mock_client.credentials.get.return_value = Response({'name': 'Token'})

    resources = {
        'REQUIRED_CPU': 1000,
        'REQUIRED_MEMORY': 9999,
        'REQUIRED_DISK_SPACE': -20
    }

    # test everything is working fine
    mp = _model.ModelPipeline.from_existing(1, 1, client=mock_client)
    assert isinstance(mp, _model.ModelPipeline)
    assert mp.dependent_variable == [container.arguments['TARGET_COLUMN']]
    assert mp.primary_key == container.arguments['PRIMARY_KEY']
    excluded = container.arguments.get('EXCLUDE_COLS', None)
    assert mp.excluded_columns == excluded.split() if excluded else None
    assert mp.model == container.arguments['MODEL']
    assert mp.calibration == container.arguments['CALIBRATION']
    assert mp.cv_params == json.loads(container.arguments['CVPARAMS'])
    assert mp.parameters == json.loads(container.arguments['PARAMS'])
    assert mp.job_resources == resources
    assert mp.model_name == container.name[:-6]
    assert mp.notifications == {
        camel_to_snake(key): val
        for key, val in container.notifications.items()
    }
    deps = container.arguments.get('DEPENDENCIES', None)
    assert mp.dependencies == deps.split() if deps else None
    assert mp.git_token_name == 'Token'
Exemple #13
0
def _container_response_stub(from_template_id=8387):
    arguments = {
        'MODEL': 'sparse_logistic',
        'TARGET_COLUMN': 'brushes_teeth_much',
        'PRIMARY_KEY': 'voterbase_id',
        'CALIBRATION': 'sigmoid',
        'EXCLUDE_COLS': 'dog cat lizard frog',
        'CVPARAMS': '{}',
        'PARAMS': '{}',
        'REQUIRED_CPU': 1000,
        'REQUIRED_MEMORY': 9999,
        'REQUIRED_DISK_SPACE': -20,
        'DEPENDENCIES': 'A B C D',
        'GIT_CRED': 9876
    }
    notifications = {
        'urls': [],
        'failureEmailAddresses': [],
        'failureOn': True,
        'stallWarningMinutes': None,
        'successEmailAddresses': [],
        'successEmailBody': None,
        'successEmailSubject': None,
        'successOn': True
    }
    return Response(dict(arguments=arguments,
                         notifications=notifications,
                         required_resources={},
                         docker_image_tag=None,
                         docker_command=None,
                         repo_http_uri=None,
                         repo_ref=None,
                         name='Civis Model Train',
                         from_template_id=from_template_id,
                         ))
Exemple #14
0
 def test_poller_returns_none(self):
     poller = mock.Mock(
         side_effect=[None, None,
                      Response({'state': 'success'})])
     polling_thread = _ResultPollingThread(poller, (),
                                           polling_interval=0.01)
     polling_thread.run()
     assert poller.call_count == 3
Exemple #15
0
def test_file_id_from_run_output_exact():
    m_client = mock.Mock()
    m_client.scripts.list_containers_runs_outputs.return_value = \
        [Response({'name': 'spam', 'object_id': 2013,
                   'object_type': 'File'})]

    fid = civis.io.file_id_from_run_output('spam', 17, 13, client=m_client)
    assert fid == 2013
def test_set_model_exception_no_exception(mock_f2j):
    # If nothing went wrong, we shouldn't set an exception
    ro = [{'name': 'model_info.json', 'object_id': 137, 'object_type': 'File'},
          {'name': 'metrics.json', 'object_id': 139, 'object_type': 'File'}]
    ro = [Response(o) for o in ro]
    mock_client = setup_client_mock(1, 2, state='succeeded', run_outputs=ro)
    fut = _model.ModelFuture(1, 2, client=mock_client)
    assert fut.exception() is None
Exemple #17
0
 def _set_api_exception(self, exc, result=None):
     with self._condition:
         if result is None:
             result = Response({"state": FAILED[0]})
         self._result = result
         self._last_result = self._result
         self.set_exception(exc)
         self.cleanup()
 def test_poll_on_creation(self):
     poller = mock.Mock(return_value=Response({"state": "running"}))
     pollable = PollableResult(poller, (),
                               polling_interval=0.01,
                               poll_on_creation=False)
     pollable.done()  # Check status once to start the polling thread
     assert poller.call_count == 0
     time.sleep(0.015)
     assert poller.call_count == 1
Exemple #19
0
def test_repeated_polling():
    # Verify that we poll the expected number of times.
    poll_interval = 0.2
    poller = mock.Mock(return_value=Response({"state": "running"}))
    pollable = PollableResult(poller, (), polling_interval=poll_interval)
    pollable.done()  # Check status once to start the polling thread
    assert poller.call_count == 1, "Poll once on the first status check"
    time.sleep(2.2 * poll_interval)
    assert poller.call_count == 3, "After waiting 2.2x the polling interval"
Exemple #20
0
def test_file_to_dataframe_kwargs():
    m_client = mock.Mock()
    m_client.files.get.return_value = Response({'name': 'spam.csv',
                                                'file_url': 'url'})
    with mock.patch.object(civis.io._files.pd, 'read_csv') as mock_read_csv:
        civis.io.file_to_dataframe(121, compression='special', client=m_client,
                                   delimiter='|', nrows=10)
        assert mock_read_csv.called_once_with(121, compression='special',
                                              delimiter='|', nrows=10)
Exemple #21
0
def test_result_callback_no_get(mock_civis):
    # Test that the completed callback happens even if we don't call `get`
    callback = mock.MagicMock()
    mock_civis.io.civis_to_file.side_effect = make_to_file_mock('spam')
    fut = ContainerFuture(1, 2, client=mock.MagicMock())
    fut.set_result(Response({'state': 'success'}))

    civis.parallel._CivisBackendResult(fut, callback)
    assert callback.call_count == 1
Exemple #22
0
 def test_poll_on_creation(self):
     poller = mock.Mock(side_effect=Response({"state": "running"}))
     pollable = PollableResult(poller, (),
                               polling_interval=0.01,
                               poll_on_creation=False)
     repr(pollable)
     assert poller.call_count == 0
     time.sleep(0.02)
     assert poller.call_count == 1
Exemple #23
0
def test_file_to_dataframe_infer():
    m_client = mock.Mock()
    url = 'url'
    m_client.files.get.return_value = Response({'name': 'spam.csv',
                                                'file_url': url})
    with mock.patch.object(civis.io._files.pd, 'read_csv',
                           autospec=True) as mock_read_csv:
        civis.io.file_to_dataframe(121, compression='infer', client=m_client)
        mock_read_csv.assert_called_once_with(url, compression='infer')
Exemple #24
0
def test_file_id_from_run_output_approximate():
    # Test fuzzy name matching
    m_client = mock.Mock()
    m_client.scripts.list_containers_runs_outputs.return_value = \
        [Response({'name': 'spam.csv.gz', 'object_id': 2013,
                   'object_type': 'File'})]

    fid = civis.io.file_id_from_run_output('^spam', 17, 13, regex=True,
                                           client=m_client)
    assert fid == 2013
Exemple #25
0
def test_result_success(mock_civis):
    # Test that we can get a result back from a succeeded job.
    callback = mock.MagicMock()
    mock_civis.io.civis_to_file.side_effect = make_to_file_mock('spam')
    fut = ContainerFuture(1, 2, client=mock.MagicMock())
    fut.set_result(Response({'state': 'success'}))
    res = civis.parallel._CivisBackendResult(fut, callback)

    assert res.get() == 'spam'
    assert callback.call_count == 1
Exemple #26
0
def test_file_id_from_run_output_no_file():
    # Get an IOError if we request a file which doesn't exist
    m_client = mock.Mock()
    m_client.scripts.list_containers_runs_outputs.return_value = [
        Response({'name': 'spam', 'object_id': 2013,
                  'object_type': 'File'})]

    with pytest.raises(FileNotFoundError) as err:
        civis.io.file_id_from_run_output('eggs', 17, 13, client=m_client)
    assert 'not an output' in str(err.value)
Exemple #27
0
 def test_timeout(self):
     # Note: Something about the test framework seems to prevent the
     # Pollable result from being destroyed while the polling
     # thread is running. The test will hang if the PollableResult
     # never completes. I haven't seen the same problem in
     # the interpreter.
     pollable = PollableResult(
         mock.Mock(side_effect=[Response({"state": "running"}),
                                ValueError()]), (),
         polling_interval=0.1)
     pytest.raises(futures.TimeoutError, pollable.result, timeout=0.05)
def test_modelpipeline_init_err():
    mock_client = mock.MagicMock()
    r = Response({'content': None, 'status_code': 9999, 'reason': None})
    mock_client.templates.get_scripts.side_effect = CivisAPIError(r)
    with pytest.raises(NotImplementedError):
        _model.ModelPipeline(LogisticRegression(),
                             'test',
                             etl=LogisticRegression(),
                             client=mock_client)
    # clean up
    del _model._NEWEST_CIVISML_VERSION
Exemple #29
0
def test_file_to_dataframe_expired():
    m_client = mock.Mock()
    url = None
    m_client.files.get.return_value = Response({
        'name': 'spam.csv',
        'file_url': url
    })
    expected_err = 'Unable to locate file 121. If it previously ' + \
        'existed, it may have expired.'
    with pytest.raises(EmptyResultError, match=expected_err):
        civis.io.file_to_dataframe(121, client=m_client)
def test_modelpipeline_classmethod_constructor_defaults(
        mock_future, container_response_stub):
    del container_response_stub.arguments['PARAMS']
    del container_response_stub.arguments['CVPARAMS']
    mock_client = mock.Mock()
    mock_client.scripts.get_containers.return_value = container_response_stub
    mock_client.credentials.get.return_value = Response({'name': 'Token'})

    # test everything is working fine
    mp = _model.ModelPipeline.from_existing(1, 1, client=mock_client)
    assert mp.cv_params == {}
    assert mp.parameters == {}