Example #1
0
def test_adls2_file_manager_write(storage_account, file_system):
    file_mock = mock.MagicMock()
    adls2_mock = mock.MagicMock()
    adls2_mock.get_file_client.return_value = file_mock
    adls2_mock.account_name = storage_account
    file_manager = ADLS2FileManager(adls2_mock, file_system, 'some-key')

    foo_bytes = 'foo'.encode()

    file_handle = file_manager.write_data(foo_bytes)

    assert isinstance(file_handle, ADLS2FileHandle)

    assert file_handle.account == storage_account
    assert file_handle.file_system == file_system
    assert file_handle.key.startswith('some-key/')

    assert file_mock.upload_data.call_count == 1

    file_handle = file_manager.write_data(foo_bytes, ext='foo')

    assert isinstance(file_handle, ADLS2FileHandle)

    assert file_handle.account == storage_account
    assert file_handle.file_system == file_system
    assert file_handle.key.startswith('some-key/')
    assert file_handle.key[-4:] == '.foo'

    assert file_mock.upload_data.call_count == 2
Example #2
0
def connect_with_fetchall_returning(value):
    cursor_mock = mock.MagicMock()
    cursor_mock.fetchall.return_value = value
    snowflake_connect = mock.MagicMock()
    snowflake_connect.cursor.return_value = cursor_mock
    m = mock.Mock()
    m.return_value = snowflake_connect
    return m
Example #3
0
def mock_execute_query_conn(*_args, **_kwargs):
    cursor_mock = mock.MagicMock(rowcount=1)
    cursor_mock.fetchall.return_value = QUERY_RESULT
    conn = mock.MagicMock(is_conn='yup')
    conn.cursor.return_value.__enter__.return_value = cursor_mock
    m = mock.MagicMock()
    m.return_value.__enter__.return_value = conn
    m.return_value = conn
    return m
Example #4
0
def create_mocked_client(batch_api=None,
                         core_api=None,
                         logger=None,
                         sleeper=None,
                         timer=None):
    return DagsterKubernetesClient(
        batch_api=batch_api or mock.MagicMock(),
        core_api=core_api or mock.MagicMock(),
        logger=logger or mock.MagicMock(),
        sleeper=sleeper or mock.MagicMock(),
        timer=timer or time.time,
    )
Example #5
0
def test_cache_file_from_s3_overwrite():
    with tempfile.TemporaryDirectory() as temp_dir:
        s3_session_one = mock.MagicMock()
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    "file_cache": fs_file_cache,
                    "s3": ResourceDefinition.hardcoded_resource(s3_session_one),
                }
            ),
            run_config={
                "solids": {
                    "cache_file_from_s3": {
                        "inputs": {"s3_coordinate": {"bucket": "some-bucket", "key": "some-key"}}
                    }
                },
                "resources": {
                    "file_cache": {"config": {"target_folder": temp_dir, "overwrite": True}}
                },
            },
        )

        # assert the download occurred
        assert s3_session_one.download_file.call_count == 1

        s3_session_two = mock.MagicMock()
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    "file_cache": fs_file_cache,
                    "s3": ResourceDefinition.hardcoded_resource(s3_session_two),
                }
            ),
            run_config={
                "solids": {
                    "cache_file_from_s3": {
                        "inputs": {"s3_coordinate": {"bucket": "some-bucket", "key": "some-key"}}
                    }
                },
                "resources": {
                    "file_cache": {"config": {"target_folder": temp_dir, "overwrite": True}}
                },
            },
        )

        # assert the download did not occur because file is already there
        assert s3_session_two.download_file.call_count == 0
Example #6
0
def test_cache_file_from_s3_overwrite():
    with get_temp_dir() as temp_dir:
        s3_session_one = mock.MagicMock()
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    'file_cache': fs_file_cache,
                    's3': ResourceDefinition.hardcoded_resource(S3Resource(s3_session_one)),
                }
            ),
            environment_dict={
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}}
                    }
                },
                'resources': {
                    'file_cache': {'config': {'target_folder': temp_dir, 'overwrite': True}}
                },
            },
        )

        # assert the download occured
        assert s3_session_one.download_file.call_count == 1

        s3_session_two = mock.MagicMock()
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    'file_cache': fs_file_cache,
                    's3': ResourceDefinition.hardcoded_resource(s3_session_two),
                }
            ),
            environment_dict={
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}}
                    }
                },
                'resources': {
                    'file_cache': {'config': {'target_folder': temp_dir, 'overwrite': True}}
                },
            },
        )

        # assert the download did not occur because file is already there
        assert s3_session_two.download_file.call_count == 0
def test_resolve_memoized_execution_plan_yes_stored_results():
    speculative_execution_plan = create_execution_plan(versioned_pipeline)
    step_output_handle = StepOutputHandle("versioned_solid_no_input.compute", "result")

    instance = DagsterInstance.ephemeral()
    instance.get_addresses_for_step_output_versions = mock.MagicMock(
        return_value={(versioned_pipeline.name, step_output_handle): "some_address"}
    )

    memoized_execution_plan = instance.resolve_memoized_execution_plan(
        speculative_execution_plan, run_config={}, mode="default"
    )

    assert memoized_execution_plan.step_keys_to_execute == ["versioned_solid_takes_input.compute"]

    expected_handle = StepOutputHandle(
        step_key="versioned_solid_no_input.compute", output_name="result"
    )

    assert (
        memoized_execution_plan.step_dict["versioned_solid_takes_input.compute"]
        .step_input_dict["intput"]
        .source.step_output_handle
        == expected_handle
    )
Example #8
0
def test_wait_for_ready_but_terminated_unsuccessfully():
    mock_client = create_mocked_client()

    single_not_ready_running_pod = _pod_list_for_container_status(
        _create_status(
            state=V1ContainerState(running=V1ContainerStateRunning()),
            ready=False))

    single_pod_terminated_unsuccessful = _pod_list_for_container_status(
        _create_status(
            state=V1ContainerState(terminated=V1ContainerStateTerminated(
                exit_code=1, message="error_message")),
            ready=False,
        ))

    mock_client.core_api.list_namespaced_pod.side_effect = [
        single_not_ready_running_pod,
        single_pod_terminated_unsuccessful,
    ]

    retrieve_pod_logs_mock = mock.MagicMock()
    retrieve_pod_logs_mock.side_effect = ["raw_logs_ret_val"]
    mock_client.retrieve_pod_logs = retrieve_pod_logs_mock

    pod_name = "a_pod"

    with pytest.raises(DagsterK8sError) as exc_info:
        mock_client.wait_for_pod(pod_name=pod_name, namespace="namespace")

    assert str(exc_info.value) == (
        'Pod did not exit successfully. Failed with message: "error_message" '
        'and pod logs: "raw_logs_ret_val"')
def test_cache_file_from_s3_step_two_use_config():
    boto_s3 = mock.MagicMock()
    with get_temp_dir() as temp_dir, mock.patch(
            'boto3.client', new=lambda *_args, **_kwargs: boto_s3):
        execute_solid(
            cache_file_from_s3,
            ModeDefinition(resource_defs={'file_cache': fs_file_cache}),
            environment_dict={
                'resources': {
                    'file_cache': {
                        'config': {
                            'target_folder': temp_dir
                        }
                    }
                },
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {
                            's3_coord': {
                                'bucket': 'some-bucket',
                                'key': 'some-key'
                            }
                        }
                    }
                },
            },
        )

        assert boto_s3.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, 'some-key'))
Example #10
0
def test_cache_file_from_s3_specify_target_key():
    s3_session = mock.MagicMock()
    with get_temp_dir() as temp_dir:
        solid_result = execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    'file_cache': fs_file_cache,
                    's3': ResourceDefinition.hardcoded_resource(S3Resource(s3_session)),
                }
            ),
            environment_dict={
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}},
                        'config': {'file_key': 'specified-file-key'},
                    }
                },
                'resources': {'file_cache': {'config': {'target_folder': temp_dir}}},
            },
        )

        # assert the download occured
        assert s3_session.download_file.call_count == 1
        assert solid_result.success
        assert isinstance(solid_result.output_value(), LocalFileHandle)
        assert 'specified-file-key' in solid_result.output_value().path_desc
Example #11
0
def test_gcs_file_manager_write():
    gcs_mock = mock.MagicMock()
    file_manager = GCSFileManager(storage.client.Client(), 'some-bucket',
                                  'some-key')
    file_manager._client = gcs_mock  # pylint:disable=protected-access

    foo_bytes = 'foo'.encode()

    file_handle = file_manager.write_data(foo_bytes)

    assert isinstance(file_handle, GCSFileHandle)

    assert file_handle.gcs_bucket == 'some-bucket'
    assert file_handle.gcs_key.startswith('some-key/')

    assert gcs_mock.get_bucket().blob().upload_from_file.call_count == 1

    file_handle = file_manager.write_data(foo_bytes, ext='foo')

    assert isinstance(file_handle, GCSFileHandle)

    assert file_handle.gcs_bucket == 'some-bucket'
    assert file_handle.gcs_key.startswith('some-key/')
    assert file_handle.gcs_key[-4:] == '.foo'

    assert gcs_mock.get_bucket().blob().upload_from_file.call_count == 2
Example #12
0
def test_cache_file_from_s3_specify_target_key():
    s3_session = mock.MagicMock()
    with tempfile.TemporaryDirectory() as temp_dir:
        solid_result = execute_solid(
            cache_file_from_s3,
            ModeDefinition(
                resource_defs={
                    "file_cache": fs_file_cache,
                    "s3": ResourceDefinition.hardcoded_resource(s3_session),
                }
            ),
            run_config={
                "solids": {
                    "cache_file_from_s3": {
                        "inputs": {"s3_coordinate": {"bucket": "some-bucket", "key": "some-key"}},
                        "config": {"file_key": "specified-file-key"},
                    }
                },
                "resources": {"file_cache": {"config": {"target_folder": temp_dir}}},
            },
        )

        # assert the download occurred
        assert s3_session.download_file.call_count == 1
        assert solid_result.success
        assert isinstance(solid_result.output_value(), LocalFileHandle)
        assert "specified-file-key" in solid_result.output_value().path_desc
Example #13
0
def test_airline_demo_load_df():
    db_info_mock = DbInfo(
        engine=mock.MagicMock(),
        url='url',
        jdbc_url='url',
        dialect='dialect',
        load_table=mock.MagicMock(),
        host='host',
        db_name='db_name',
    )

    @solid
    def emit_mock(_):
        return mock.MagicMock(spec=DataFrame)

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={
                'db_info': ResourceDefinition.hardcoded_resource(db_info_mock),
                'pyspark': pyspark_resource,
                'pyspark_step_launcher': no_step_launcher,
            })
    ])
    def load_df_test():
        load_data_to_database_from_spark(emit_mock())

    solid_result = execute_pipeline(
        load_df_test,
        run_config={
            'solids': {
                'load_data_to_database_from_spark': {
                    'config': {
                        'table_name': 'foo'
                    }
                }
            }
        },
    ).result_for_solid('load_data_to_database_from_spark')

    assert solid_result.success
    mats = solid_result.materializations_during_compute
    assert len(mats) == 1
    mat = mats[0]
    assert len(mat.metadata_entries) == 2
    entries = {me.label: me for me in mat.metadata_entries}
    assert entries['Host'].entry_data.text == 'host'
    assert entries['Db'].entry_data.text == 'db_name'
def test_airline_demo_load_df():
    db_info_mock = DbInfo(
        engine=mock.MagicMock(),
        url="url",
        jdbc_url="url",
        dialect="dialect",
        load_table=mock.MagicMock(),
        host="host",
        db_name="db_name",
    )

    @solid
    def emit_mock(_):
        return mock.MagicMock(spec=DataFrame)

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={
                "db_info": ResourceDefinition.hardcoded_resource(db_info_mock),
                "pyspark": pyspark_resource,
                "pyspark_step_launcher": no_step_launcher,
            })
    ])
    def load_df_test():
        load_data_to_database_from_spark(emit_mock())

    solid_result = execute_pipeline(
        load_df_test,
        run_config={
            "solids": {
                "load_data_to_database_from_spark": {
                    "config": {
                        "table_name": "foo"
                    }
                }
            }
        },
    ).result_for_solid("load_data_to_database_from_spark")

    assert solid_result.success
    mats = solid_result.materializations_during_compute
    assert len(mats) == 1
    mat = mats[0]
    assert len(mat.metadata_entries) == 2
    entries = {me.label: me for me in mat.metadata_entries}
    assert entries["Host"].entry_data.text == "host"
    assert entries["Db"].entry_data.text == "db_name"
Example #15
0
def intercept_spark_submit(*args, **kwargs):
    if args[0] == ['spark-submit', '--master', '', '--name', 'airflow-spark', 'some_path.py']:
        m = mock.MagicMock()
        m.stdout.readline.return_value = ''
        m.wait.return_value = 0
        return m
    else:
        return subprocess.Popen(*args, **kwargs)
Example #16
0
def test_defensive_pipelines_cannot_decompress():
    mock_logger = mock.MagicMock()

    assert defensively_unpack_pipeline_snapshot_query(mock_logger, ["notbytes".encode()]) is None
    assert mock_logger.warning.call_count == 1
    mock_logger.warning.assert_called_with(
        "get-pipeline-snapshot: Could not decompress bytes stored in snapshot table."
    )
def test_defensive_pipelines_cannot_parse_json():
    mock_logger = mock.MagicMock()

    assert (defensively_unpack_pipeline_snapshot_query(
        mock_logger, [zlib.compress(b"notjson")]) is None)
    assert mock_logger.warning.call_count == 1
    mock_logger.warning.assert_called_with(
        "get-pipeline-snapshot: Could not parse json in snapshot table.")
def intercept_spark_submit(*args, **kwargs):
    if args[0] == ["spark-submit", "--master", "", "--name", "airflow-spark", "some_path.py"]:
        m = mock.MagicMock()
        m.stdout.readline.return_value = ""
        m.wait.return_value = 0
        return m
    else:
        return subprocess.Popen(*args, **kwargs)
def test_temporary_error_or_deletion_after_instance_check():
    instance = mock.MagicMock()

    instance.has_historical_pipeline.return_value = True
    instance.get_historical_pipeline.return_value = None

    with pytest.raises(UserFacingGraphQLError):
        _get_pipeline_snapshot_from_instance(instance, "kjdkfjd")
Example #20
0
def test_user_defined_k8s_config_in_run_tags(kubeconfig_file):
    # Construct a K8s run launcher in a fake k8s environment.
    mock_k8s_client_batch_api = mock.MagicMock()
    k8s_run_launcher = K8sRunLauncher(
        service_account_name="dagit-admin",
        instance_config_map="dagster-instance",
        postgres_password_secret="dagster-postgresql-secret",
        dagster_home="/opt/dagster/dagster_home",
        job_image="fake_job_image",
        load_incluster_config=False,
        kubeconfig_file=kubeconfig_file,
        k8s_client_batch_api=mock_k8s_client_batch_api,
    )

    # Construct Dagster run tags with user defined k8s config.
    expected_resources = {
        "requests": {"cpu": "250m", "memory": "64Mi"},
        "limits": {"cpu": "500m", "memory": "2560Mi"},
    }
    user_defined_k8s_config = UserDefinedDagsterK8sConfig(
        container_config={"resources": expected_resources},
    )
    user_defined_k8s_config_json = json.dumps(user_defined_k8s_config.to_dict())
    tags = {"dagster-k8s/config": user_defined_k8s_config_json}

    # Create fake external pipeline.
    recon_pipeline = reconstructable(fake_pipeline)
    recon_repo = recon_pipeline.repository
    repo_def = recon_repo.get_definition()
    location_origin = InProcessRepositoryLocationOrigin(recon_repo)
    location_handle = RepositoryLocationHandle.create_from_repository_location_origin(
        location_origin,
    )
    repo_handle = RepositoryHandle(
        repository_name=repo_def.name,
        repository_location_handle=location_handle,
    )
    fake_external_pipeline = external_pipeline_from_recon_pipeline(
        recon_pipeline,
        solid_selection=None,
        repository_handle=repo_handle,
    )

    # Launch the run in a fake Dagster instance.
    with instance_for_test() as instance:
        pipeline_name = "demo_pipeline"
        run = create_run_for_test(instance, pipeline_name=pipeline_name, tags=tags)
        k8s_run_launcher.initialize(instance)
        k8s_run_launcher.launch_run(None, run, fake_external_pipeline)

    # Check that user defined k8s config was passed down to the k8s job.
    mock_method_calls = mock_k8s_client_batch_api.method_calls
    assert len(mock_method_calls) > 0
    method_name, _args, kwargs = mock_method_calls[0]
    assert method_name == "create_namespaced_job"
    job_resources = kwargs["body"].spec.template.spec.containers[0].resources
    assert job_resources == expected_resources
def test_defensive_pipeline_not_a_string():
    mock_logger = mock.MagicMock()

    assert defensively_unpack_pipeline_snapshot_query(mock_logger,
                                                      [234]) is None
    assert mock_logger.warning.call_count == 1

    mock_logger.warning.assert_called_with(
        "get-pipeline-snapshot: First entry in row is not a binary type.")
Example #22
0
def test_airline_demo_load_df():
    db_info_mock = DbInfo(
        engine=mock.MagicMock(),
        url='url',
        jdbc_url='url',
        dialect='dialect',
        load_table=mock.MagicMock(),
        host='host',
        db_name='db_name',
    )
    pipeline_def = PipelineDefinition(
        name='load_df_test',
        solid_defs=[load_data_to_database_from_spark],
        mode_definitions=[
            ModeDefinition(resources={
                'db_info':
                ResourceDefinition.hardcoded_resource(db_info_mock)
            })
        ],
    )

    solid_result = execute_solid(
        pipeline_def,
        'load_data_to_database_from_spark',
        inputs={'data_frame': mock.MagicMock(spec=DataFrame)},
        environment_dict={
            'solids': {
                'load_data_to_database_from_spark': {
                    'config': {
                        'table_name': 'foo'
                    }
                }
            }
        },
    )
    assert solid_result.success
    mats = solid_result.materializations_during_compute
    assert len(mats) == 1
    mat = mats[0]
    assert len(mat.metadata_entries) == 2
    entries = {me.label: me for me in mat.metadata_entries}
    assert entries['Host'].entry_data.text == 'host'
    assert entries['Db'].entry_data.text == 'db_name'
Example #23
0
def create_timing_out_timer(num_good_ticks):
    mock_timer = mock.MagicMock()
    times = [1593697070.443257]  # fixed time on 7/2/2020
    i = 0
    while i < num_good_ticks:
        times.append(times[-1] + 1)
        i += 1
    times.append(times[-1] + TIMEOUT_GAP)
    mock_timer.side_effect = times
    return mock_timer
def test_defensive_pipelines_cannot_decode_post_decompress():
    mock_logger = mock.MagicMock()

    # guarantee that we cannot decode by double compressing bytes.
    assert (defensively_unpack_pipeline_snapshot_query(
        mock_logger, [zlib.compress(zlib.compress(b"notbytes"))]) is None)
    assert mock_logger.warning.call_count == 1
    mock_logger.warning.assert_called_with(
        "get-pipeline-snapshot: Could not unicode decode decompressed bytes "
        "stored in snapshot table.")
def test_resolve_memoized_execution_plan_partial_versioning():
    speculative_execution_plan = create_execution_plan(partially_versioned_pipeline)
    step_output_handle = StepOutputHandle("versioned_solid_no_input.compute", "result")

    instance = DagsterInstance.ephemeral()
    instance.get_addresses_for_step_output_versions = mock.MagicMock(
        return_value={(partially_versioned_pipeline.name, step_output_handle): "some_address"}
    )

    assert instance.resolve_memoized_execution_plan(
        speculative_execution_plan, run_config={}, mode="default"
    ).step_keys_to_execute == ["solid_takes_input.compute"]
def test_cache_file_from_s3_step_three_mock():
    s3_session = mock.MagicMock()
    with get_temp_dir() as temp_dir:
        execute_solid(
            cache_file_from_s3,
            unittest_for_local_mode_def(temp_dir, s3_session),
            input_values={'s3_coord': {'bucket': 'some-bucket', 'key': 'some-key'}},
        )

        assert s3_session.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, 'some-key'))
Example #27
0
def test_cache_file_from_s3_basic():
    s3_session = mock.MagicMock()
    with get_temp_dir() as temp_dir:
        pipeline_result = execute_solid_with_resources(
            cache_file_from_s3,
            resources={
                'file_cache':
                fs_file_cache,
                's3':
                ResourceDefinition.hardcoded_resource(S3Resource(s3_session)),
            },
            environment_dict={
                'solids': {
                    'cache_file_from_s3': {
                        'inputs': {
                            'bucket_data': {
                                'bucket': 'some-bucket',
                                'key': 'some-key'
                            }
                        }
                    }
                },
                'resources': {
                    'file_cache': {
                        'config': {
                            'target_folder': temp_dir
                        }
                    }
                },
            },
        )

        # assert the download occured
        assert s3_session.download_file.call_count == 1

        assert pipeline_result.success

        solid_result = pipeline_result.result_for_solid('cache_file_from_s3')

        assert solid_result.success

        expectation_results = solid_result.expectation_results_during_compute
        assert len(expectation_results) == 1
        expectation_result = expectation_results[0]
        assert expectation_result.success
        assert expectation_result.label == 'file_handle_exists'
        path_in_metadata = expectation_result.metadata_entries[
            0].entry_data.path
        assert isinstance(path_in_metadata, str)
        assert os.path.exists(path_in_metadata)

        assert isinstance(solid_result.result_value(), LocalFileHandle)
        assert 'some-key' in solid_result.result_value().path_desc
def test_resolve_unmemoized_steps_no_stored_results():
    speculative_execution_plan = create_execution_plan(versioned_pipeline)

    instance = DagsterInstance.ephemeral()
    instance.get_addresses_for_step_output_versions = mock.MagicMock(
        return_value={})

    assert set(
        instance.resolve_unmemoized_steps(
            speculative_execution_plan, run_config={}, mode="default")) == {
                "versioned_solid_no_input.compute",
                "versioned_solid_takes_input.compute"
            }
Example #29
0
def test_cache_file_from_s3_step_two_skip_config():
    boto_s3 = mock.MagicMock()
    with get_temp_dir() as temp_dir, mock.patch(
        "boto3.client", new=lambda *_args, **_kwargs: boto_s3
    ):
        execute_solid(
            cache_file_from_s3,
            ModeDefinition.from_resources({"file_cache": FSFileCache(temp_dir)}),
            input_values={"s3_coord": {"bucket": "some-bucket", "key": "some-key"}},
        )

        assert boto_s3.download_file.call_count == 1

        assert os.path.exists(os.path.join(temp_dir, "some-key"))
Example #30
0
def test_s3_file_manager_write():
    s3_mock = mock.MagicMock()
    file_manager = S3FileManager(s3_mock, 'some-bucket', 'some-key')

    foo_bytes = 'foo'.encode()

    file_handle = file_manager.write_data(foo_bytes)

    assert isinstance(file_handle, S3FileHandle)

    assert file_handle.s3_bucket == 'some-bucket'
    assert file_handle.s3_key.startswith('some-key/')

    assert s3_mock.put_object.call_count == 1