def test_adls2_file_manager_write(storage_account, file_system): file_mock = mock.MagicMock() adls2_mock = mock.MagicMock() adls2_mock.get_file_client.return_value = file_mock adls2_mock.account_name = storage_account file_manager = ADLS2FileManager(adls2_mock, file_system, 'some-key') foo_bytes = 'foo'.encode() file_handle = file_manager.write_data(foo_bytes) assert isinstance(file_handle, ADLS2FileHandle) assert file_handle.account == storage_account assert file_handle.file_system == file_system assert file_handle.key.startswith('some-key/') assert file_mock.upload_data.call_count == 1 file_handle = file_manager.write_data(foo_bytes, ext='foo') assert isinstance(file_handle, ADLS2FileHandle) assert file_handle.account == storage_account assert file_handle.file_system == file_system assert file_handle.key.startswith('some-key/') assert file_handle.key[-4:] == '.foo' assert file_mock.upload_data.call_count == 2
def connect_with_fetchall_returning(value): cursor_mock = mock.MagicMock() cursor_mock.fetchall.return_value = value snowflake_connect = mock.MagicMock() snowflake_connect.cursor.return_value = cursor_mock m = mock.Mock() m.return_value = snowflake_connect return m
def mock_execute_query_conn(*_args, **_kwargs): cursor_mock = mock.MagicMock(rowcount=1) cursor_mock.fetchall.return_value = QUERY_RESULT conn = mock.MagicMock(is_conn='yup') conn.cursor.return_value.__enter__.return_value = cursor_mock m = mock.MagicMock() m.return_value.__enter__.return_value = conn m.return_value = conn return m
def create_mocked_client(batch_api=None, core_api=None, logger=None, sleeper=None, timer=None): return DagsterKubernetesClient( batch_api=batch_api or mock.MagicMock(), core_api=core_api or mock.MagicMock(), logger=logger or mock.MagicMock(), sleeper=sleeper or mock.MagicMock(), timer=timer or time.time, )
def test_cache_file_from_s3_overwrite(): with tempfile.TemporaryDirectory() as temp_dir: s3_session_one = mock.MagicMock() execute_solid( cache_file_from_s3, ModeDefinition( resource_defs={ "file_cache": fs_file_cache, "s3": ResourceDefinition.hardcoded_resource(s3_session_one), } ), run_config={ "solids": { "cache_file_from_s3": { "inputs": {"s3_coordinate": {"bucket": "some-bucket", "key": "some-key"}} } }, "resources": { "file_cache": {"config": {"target_folder": temp_dir, "overwrite": True}} }, }, ) # assert the download occurred assert s3_session_one.download_file.call_count == 1 s3_session_two = mock.MagicMock() execute_solid( cache_file_from_s3, ModeDefinition( resource_defs={ "file_cache": fs_file_cache, "s3": ResourceDefinition.hardcoded_resource(s3_session_two), } ), run_config={ "solids": { "cache_file_from_s3": { "inputs": {"s3_coordinate": {"bucket": "some-bucket", "key": "some-key"}} } }, "resources": { "file_cache": {"config": {"target_folder": temp_dir, "overwrite": True}} }, }, ) # assert the download did not occur because file is already there assert s3_session_two.download_file.call_count == 0
def test_cache_file_from_s3_overwrite(): with get_temp_dir() as temp_dir: s3_session_one = mock.MagicMock() execute_solid( cache_file_from_s3, ModeDefinition( resource_defs={ 'file_cache': fs_file_cache, 's3': ResourceDefinition.hardcoded_resource(S3Resource(s3_session_one)), } ), environment_dict={ 'solids': { 'cache_file_from_s3': { 'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}} } }, 'resources': { 'file_cache': {'config': {'target_folder': temp_dir, 'overwrite': True}} }, }, ) # assert the download occured assert s3_session_one.download_file.call_count == 1 s3_session_two = mock.MagicMock() execute_solid( cache_file_from_s3, ModeDefinition( resource_defs={ 'file_cache': fs_file_cache, 's3': ResourceDefinition.hardcoded_resource(s3_session_two), } ), environment_dict={ 'solids': { 'cache_file_from_s3': { 'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}} } }, 'resources': { 'file_cache': {'config': {'target_folder': temp_dir, 'overwrite': True}} }, }, ) # assert the download did not occur because file is already there assert s3_session_two.download_file.call_count == 0
def test_resolve_memoized_execution_plan_yes_stored_results(): speculative_execution_plan = create_execution_plan(versioned_pipeline) step_output_handle = StepOutputHandle("versioned_solid_no_input.compute", "result") instance = DagsterInstance.ephemeral() instance.get_addresses_for_step_output_versions = mock.MagicMock( return_value={(versioned_pipeline.name, step_output_handle): "some_address"} ) memoized_execution_plan = instance.resolve_memoized_execution_plan( speculative_execution_plan, run_config={}, mode="default" ) assert memoized_execution_plan.step_keys_to_execute == ["versioned_solid_takes_input.compute"] expected_handle = StepOutputHandle( step_key="versioned_solid_no_input.compute", output_name="result" ) assert ( memoized_execution_plan.step_dict["versioned_solid_takes_input.compute"] .step_input_dict["intput"] .source.step_output_handle == expected_handle )
def test_wait_for_ready_but_terminated_unsuccessfully(): mock_client = create_mocked_client() single_not_ready_running_pod = _pod_list_for_container_status( _create_status( state=V1ContainerState(running=V1ContainerStateRunning()), ready=False)) single_pod_terminated_unsuccessful = _pod_list_for_container_status( _create_status( state=V1ContainerState(terminated=V1ContainerStateTerminated( exit_code=1, message="error_message")), ready=False, )) mock_client.core_api.list_namespaced_pod.side_effect = [ single_not_ready_running_pod, single_pod_terminated_unsuccessful, ] retrieve_pod_logs_mock = mock.MagicMock() retrieve_pod_logs_mock.side_effect = ["raw_logs_ret_val"] mock_client.retrieve_pod_logs = retrieve_pod_logs_mock pod_name = "a_pod" with pytest.raises(DagsterK8sError) as exc_info: mock_client.wait_for_pod(pod_name=pod_name, namespace="namespace") assert str(exc_info.value) == ( 'Pod did not exit successfully. Failed with message: "error_message" ' 'and pod logs: "raw_logs_ret_val"')
def test_cache_file_from_s3_step_two_use_config(): boto_s3 = mock.MagicMock() with get_temp_dir() as temp_dir, mock.patch( 'boto3.client', new=lambda *_args, **_kwargs: boto_s3): execute_solid( cache_file_from_s3, ModeDefinition(resource_defs={'file_cache': fs_file_cache}), environment_dict={ 'resources': { 'file_cache': { 'config': { 'target_folder': temp_dir } } }, 'solids': { 'cache_file_from_s3': { 'inputs': { 's3_coord': { 'bucket': 'some-bucket', 'key': 'some-key' } } } }, }, ) assert boto_s3.download_file.call_count == 1 assert os.path.exists(os.path.join(temp_dir, 'some-key'))
def test_cache_file_from_s3_specify_target_key(): s3_session = mock.MagicMock() with get_temp_dir() as temp_dir: solid_result = execute_solid( cache_file_from_s3, ModeDefinition( resource_defs={ 'file_cache': fs_file_cache, 's3': ResourceDefinition.hardcoded_resource(S3Resource(s3_session)), } ), environment_dict={ 'solids': { 'cache_file_from_s3': { 'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}}, 'config': {'file_key': 'specified-file-key'}, } }, 'resources': {'file_cache': {'config': {'target_folder': temp_dir}}}, }, ) # assert the download occured assert s3_session.download_file.call_count == 1 assert solid_result.success assert isinstance(solid_result.output_value(), LocalFileHandle) assert 'specified-file-key' in solid_result.output_value().path_desc
def test_gcs_file_manager_write(): gcs_mock = mock.MagicMock() file_manager = GCSFileManager(storage.client.Client(), 'some-bucket', 'some-key') file_manager._client = gcs_mock # pylint:disable=protected-access foo_bytes = 'foo'.encode() file_handle = file_manager.write_data(foo_bytes) assert isinstance(file_handle, GCSFileHandle) assert file_handle.gcs_bucket == 'some-bucket' assert file_handle.gcs_key.startswith('some-key/') assert gcs_mock.get_bucket().blob().upload_from_file.call_count == 1 file_handle = file_manager.write_data(foo_bytes, ext='foo') assert isinstance(file_handle, GCSFileHandle) assert file_handle.gcs_bucket == 'some-bucket' assert file_handle.gcs_key.startswith('some-key/') assert file_handle.gcs_key[-4:] == '.foo' assert gcs_mock.get_bucket().blob().upload_from_file.call_count == 2
def test_cache_file_from_s3_specify_target_key(): s3_session = mock.MagicMock() with tempfile.TemporaryDirectory() as temp_dir: solid_result = execute_solid( cache_file_from_s3, ModeDefinition( resource_defs={ "file_cache": fs_file_cache, "s3": ResourceDefinition.hardcoded_resource(s3_session), } ), run_config={ "solids": { "cache_file_from_s3": { "inputs": {"s3_coordinate": {"bucket": "some-bucket", "key": "some-key"}}, "config": {"file_key": "specified-file-key"}, } }, "resources": {"file_cache": {"config": {"target_folder": temp_dir}}}, }, ) # assert the download occurred assert s3_session.download_file.call_count == 1 assert solid_result.success assert isinstance(solid_result.output_value(), LocalFileHandle) assert "specified-file-key" in solid_result.output_value().path_desc
def test_airline_demo_load_df(): db_info_mock = DbInfo( engine=mock.MagicMock(), url='url', jdbc_url='url', dialect='dialect', load_table=mock.MagicMock(), host='host', db_name='db_name', ) @solid def emit_mock(_): return mock.MagicMock(spec=DataFrame) @pipeline(mode_defs=[ ModeDefinition( resource_defs={ 'db_info': ResourceDefinition.hardcoded_resource(db_info_mock), 'pyspark': pyspark_resource, 'pyspark_step_launcher': no_step_launcher, }) ]) def load_df_test(): load_data_to_database_from_spark(emit_mock()) solid_result = execute_pipeline( load_df_test, run_config={ 'solids': { 'load_data_to_database_from_spark': { 'config': { 'table_name': 'foo' } } } }, ).result_for_solid('load_data_to_database_from_spark') assert solid_result.success mats = solid_result.materializations_during_compute assert len(mats) == 1 mat = mats[0] assert len(mat.metadata_entries) == 2 entries = {me.label: me for me in mat.metadata_entries} assert entries['Host'].entry_data.text == 'host' assert entries['Db'].entry_data.text == 'db_name'
def test_airline_demo_load_df(): db_info_mock = DbInfo( engine=mock.MagicMock(), url="url", jdbc_url="url", dialect="dialect", load_table=mock.MagicMock(), host="host", db_name="db_name", ) @solid def emit_mock(_): return mock.MagicMock(spec=DataFrame) @pipeline(mode_defs=[ ModeDefinition( resource_defs={ "db_info": ResourceDefinition.hardcoded_resource(db_info_mock), "pyspark": pyspark_resource, "pyspark_step_launcher": no_step_launcher, }) ]) def load_df_test(): load_data_to_database_from_spark(emit_mock()) solid_result = execute_pipeline( load_df_test, run_config={ "solids": { "load_data_to_database_from_spark": { "config": { "table_name": "foo" } } } }, ).result_for_solid("load_data_to_database_from_spark") assert solid_result.success mats = solid_result.materializations_during_compute assert len(mats) == 1 mat = mats[0] assert len(mat.metadata_entries) == 2 entries = {me.label: me for me in mat.metadata_entries} assert entries["Host"].entry_data.text == "host" assert entries["Db"].entry_data.text == "db_name"
def intercept_spark_submit(*args, **kwargs): if args[0] == ['spark-submit', '--master', '', '--name', 'airflow-spark', 'some_path.py']: m = mock.MagicMock() m.stdout.readline.return_value = '' m.wait.return_value = 0 return m else: return subprocess.Popen(*args, **kwargs)
def test_defensive_pipelines_cannot_decompress(): mock_logger = mock.MagicMock() assert defensively_unpack_pipeline_snapshot_query(mock_logger, ["notbytes".encode()]) is None assert mock_logger.warning.call_count == 1 mock_logger.warning.assert_called_with( "get-pipeline-snapshot: Could not decompress bytes stored in snapshot table." )
def test_defensive_pipelines_cannot_parse_json(): mock_logger = mock.MagicMock() assert (defensively_unpack_pipeline_snapshot_query( mock_logger, [zlib.compress(b"notjson")]) is None) assert mock_logger.warning.call_count == 1 mock_logger.warning.assert_called_with( "get-pipeline-snapshot: Could not parse json in snapshot table.")
def intercept_spark_submit(*args, **kwargs): if args[0] == ["spark-submit", "--master", "", "--name", "airflow-spark", "some_path.py"]: m = mock.MagicMock() m.stdout.readline.return_value = "" m.wait.return_value = 0 return m else: return subprocess.Popen(*args, **kwargs)
def test_temporary_error_or_deletion_after_instance_check(): instance = mock.MagicMock() instance.has_historical_pipeline.return_value = True instance.get_historical_pipeline.return_value = None with pytest.raises(UserFacingGraphQLError): _get_pipeline_snapshot_from_instance(instance, "kjdkfjd")
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file): # Construct a K8s run launcher in a fake k8s environment. mock_k8s_client_batch_api = mock.MagicMock() k8s_run_launcher = K8sRunLauncher( service_account_name="dagit-admin", instance_config_map="dagster-instance", postgres_password_secret="dagster-postgresql-secret", dagster_home="/opt/dagster/dagster_home", job_image="fake_job_image", load_incluster_config=False, kubeconfig_file=kubeconfig_file, k8s_client_batch_api=mock_k8s_client_batch_api, ) # Construct Dagster run tags with user defined k8s config. expected_resources = { "requests": {"cpu": "250m", "memory": "64Mi"}, "limits": {"cpu": "500m", "memory": "2560Mi"}, } user_defined_k8s_config = UserDefinedDagsterK8sConfig( container_config={"resources": expected_resources}, ) user_defined_k8s_config_json = json.dumps(user_defined_k8s_config.to_dict()) tags = {"dagster-k8s/config": user_defined_k8s_config_json} # Create fake external pipeline. recon_pipeline = reconstructable(fake_pipeline) recon_repo = recon_pipeline.repository repo_def = recon_repo.get_definition() location_origin = InProcessRepositoryLocationOrigin(recon_repo) location_handle = RepositoryLocationHandle.create_from_repository_location_origin( location_origin, ) repo_handle = RepositoryHandle( repository_name=repo_def.name, repository_location_handle=location_handle, ) fake_external_pipeline = external_pipeline_from_recon_pipeline( recon_pipeline, solid_selection=None, repository_handle=repo_handle, ) # Launch the run in a fake Dagster instance. with instance_for_test() as instance: pipeline_name = "demo_pipeline" run = create_run_for_test(instance, pipeline_name=pipeline_name, tags=tags) k8s_run_launcher.initialize(instance) k8s_run_launcher.launch_run(None, run, fake_external_pipeline) # Check that user defined k8s config was passed down to the k8s job. mock_method_calls = mock_k8s_client_batch_api.method_calls assert len(mock_method_calls) > 0 method_name, _args, kwargs = mock_method_calls[0] assert method_name == "create_namespaced_job" job_resources = kwargs["body"].spec.template.spec.containers[0].resources assert job_resources == expected_resources
def test_defensive_pipeline_not_a_string(): mock_logger = mock.MagicMock() assert defensively_unpack_pipeline_snapshot_query(mock_logger, [234]) is None assert mock_logger.warning.call_count == 1 mock_logger.warning.assert_called_with( "get-pipeline-snapshot: First entry in row is not a binary type.")
def test_airline_demo_load_df(): db_info_mock = DbInfo( engine=mock.MagicMock(), url='url', jdbc_url='url', dialect='dialect', load_table=mock.MagicMock(), host='host', db_name='db_name', ) pipeline_def = PipelineDefinition( name='load_df_test', solid_defs=[load_data_to_database_from_spark], mode_definitions=[ ModeDefinition(resources={ 'db_info': ResourceDefinition.hardcoded_resource(db_info_mock) }) ], ) solid_result = execute_solid( pipeline_def, 'load_data_to_database_from_spark', inputs={'data_frame': mock.MagicMock(spec=DataFrame)}, environment_dict={ 'solids': { 'load_data_to_database_from_spark': { 'config': { 'table_name': 'foo' } } } }, ) assert solid_result.success mats = solid_result.materializations_during_compute assert len(mats) == 1 mat = mats[0] assert len(mat.metadata_entries) == 2 entries = {me.label: me for me in mat.metadata_entries} assert entries['Host'].entry_data.text == 'host' assert entries['Db'].entry_data.text == 'db_name'
def create_timing_out_timer(num_good_ticks): mock_timer = mock.MagicMock() times = [1593697070.443257] # fixed time on 7/2/2020 i = 0 while i < num_good_ticks: times.append(times[-1] + 1) i += 1 times.append(times[-1] + TIMEOUT_GAP) mock_timer.side_effect = times return mock_timer
def test_defensive_pipelines_cannot_decode_post_decompress(): mock_logger = mock.MagicMock() # guarantee that we cannot decode by double compressing bytes. assert (defensively_unpack_pipeline_snapshot_query( mock_logger, [zlib.compress(zlib.compress(b"notbytes"))]) is None) assert mock_logger.warning.call_count == 1 mock_logger.warning.assert_called_with( "get-pipeline-snapshot: Could not unicode decode decompressed bytes " "stored in snapshot table.")
def test_resolve_memoized_execution_plan_partial_versioning(): speculative_execution_plan = create_execution_plan(partially_versioned_pipeline) step_output_handle = StepOutputHandle("versioned_solid_no_input.compute", "result") instance = DagsterInstance.ephemeral() instance.get_addresses_for_step_output_versions = mock.MagicMock( return_value={(partially_versioned_pipeline.name, step_output_handle): "some_address"} ) assert instance.resolve_memoized_execution_plan( speculative_execution_plan, run_config={}, mode="default" ).step_keys_to_execute == ["solid_takes_input.compute"]
def test_cache_file_from_s3_step_three_mock(): s3_session = mock.MagicMock() with get_temp_dir() as temp_dir: execute_solid( cache_file_from_s3, unittest_for_local_mode_def(temp_dir, s3_session), input_values={'s3_coord': {'bucket': 'some-bucket', 'key': 'some-key'}}, ) assert s3_session.download_file.call_count == 1 assert os.path.exists(os.path.join(temp_dir, 'some-key'))
def test_cache_file_from_s3_basic(): s3_session = mock.MagicMock() with get_temp_dir() as temp_dir: pipeline_result = execute_solid_with_resources( cache_file_from_s3, resources={ 'file_cache': fs_file_cache, 's3': ResourceDefinition.hardcoded_resource(S3Resource(s3_session)), }, environment_dict={ 'solids': { 'cache_file_from_s3': { 'inputs': { 'bucket_data': { 'bucket': 'some-bucket', 'key': 'some-key' } } } }, 'resources': { 'file_cache': { 'config': { 'target_folder': temp_dir } } }, }, ) # assert the download occured assert s3_session.download_file.call_count == 1 assert pipeline_result.success solid_result = pipeline_result.result_for_solid('cache_file_from_s3') assert solid_result.success expectation_results = solid_result.expectation_results_during_compute assert len(expectation_results) == 1 expectation_result = expectation_results[0] assert expectation_result.success assert expectation_result.label == 'file_handle_exists' path_in_metadata = expectation_result.metadata_entries[ 0].entry_data.path assert isinstance(path_in_metadata, str) assert os.path.exists(path_in_metadata) assert isinstance(solid_result.result_value(), LocalFileHandle) assert 'some-key' in solid_result.result_value().path_desc
def test_resolve_unmemoized_steps_no_stored_results(): speculative_execution_plan = create_execution_plan(versioned_pipeline) instance = DagsterInstance.ephemeral() instance.get_addresses_for_step_output_versions = mock.MagicMock( return_value={}) assert set( instance.resolve_unmemoized_steps( speculative_execution_plan, run_config={}, mode="default")) == { "versioned_solid_no_input.compute", "versioned_solid_takes_input.compute" }
def test_cache_file_from_s3_step_two_skip_config(): boto_s3 = mock.MagicMock() with get_temp_dir() as temp_dir, mock.patch( "boto3.client", new=lambda *_args, **_kwargs: boto_s3 ): execute_solid( cache_file_from_s3, ModeDefinition.from_resources({"file_cache": FSFileCache(temp_dir)}), input_values={"s3_coord": {"bucket": "some-bucket", "key": "some-key"}}, ) assert boto_s3.download_file.call_count == 1 assert os.path.exists(os.path.join(temp_dir, "some-key"))
def test_s3_file_manager_write(): s3_mock = mock.MagicMock() file_manager = S3FileManager(s3_mock, 'some-bucket', 'some-key') foo_bytes = 'foo'.encode() file_handle = file_manager.write_data(foo_bytes) assert isinstance(file_handle, S3FileHandle) assert file_handle.s3_bucket == 'some-bucket' assert file_handle.s3_key.startswith('some-key/') assert s3_mock.put_object.call_count == 1