def test_run_cleaning(self, m_run_job, fids): def mock_preprocess(fid): resp = Response({'id': fid}) return resp self.mock_client.files.post_preprocess_csv\ .side_effect = mock_preprocess res = civis.io._tables._run_cleaning(fids, self.mock_client, True, True, 'comma', True) # We should have one cleaning job per provided file id fid_count = len(fids) assert len(res) == fid_count self.mock_client.files.post_preprocess_csv.assert_has_calls(( mock.call( file_id=fid, in_place=False, detect_table_columns=True, force_character_set_conversion=True, include_header=True, column_delimiter='comma', hidden=True) for fid in fids )) m_run_job.assert_has_calls(( mock.call( jid, client=self.mock_client, polling_interval=None ) for jid in fids) )
def test_template_submit(mock_file, mock_result, mock_pool): # Verify that creating child jobs from a template looks like we expect file_id = 17 mock_client = mock.Mock() mock_file.return_value = file_id factory = civis.parallel.make_backend_template_factory( from_template_id=1234, client=mock_client) n_calls = 3 register_parallel_backend('civis', factory) with parallel_backend('civis'): parallel = Parallel(n_jobs=5, pre_dispatch='n_jobs') parallel(delayed(sqrt)(i**2) for i in range(n_calls)) assert mock_file.call_count == 3, "Upload 3 functions to run" assert mock_pool().submit.call_count == n_calls, "Run 3 functions" for this_call in mock_pool().submit.call_args_list: assert this_call == mock.call(JOBLIB_FUNC_FILE_ID=file_id) assert mock_result.call_count == 3, "Create 3 results"
def test_template_submit(mock_file, mock_result, mock_pool): # Verify that creating child jobs from a template looks like we expect file_id = 17 mock_client = mock.Mock() mock_file.return_value = file_id factory = civis.parallel.make_backend_template_factory( from_template_id=1234, client=mock_client) n_calls = 3 register_parallel_backend('civis', factory) with parallel_backend('civis'): # NB: joblib >v0.11 relies on callbacks from the result object to # decide when it's done consuming inputs. We've mocked the result # object here, so Parallel must be called either with n_jobs=1 or # pre_dispatch='all' to consume the inputs all at once. parallel = Parallel(n_jobs=1, pre_dispatch='n_jobs') parallel(delayed(sqrt)(i**2) for i in range(n_calls)) assert mock_file.call_count == 3, "Upload 3 functions to run" assert mock_pool().submit.call_count == n_calls, "Run 3 functions" for this_call in mock_pool().submit.call_args_list: assert this_call == mock.call(JOBLIB_FUNC_FILE_ID=file_id) assert mock_result.call_count == 3, "Create 3 results"