Ejemplo n.º 1
0
def test_dagster_telemetry_enabled(caplog):
    with instance_for_test(overrides={"telemetry": {"enabled": True}}):
        runner = CliRunner()
        with pushd(path_to_file("")):
            pipeline_attribute = "foo_pipeline"
            pipeline_name = "foo"
            result = runner.invoke(
                pipeline_execute_command,
                [
                    "-f",
                    path_to_file("test_cli_commands.py"),
                    "-a",
                    pipeline_attribute,
                ],
            )

            for record in caplog.records:
                message = json.loads(record.getMessage())
                if message.get("action") == UPDATE_REPO_STATS:
                    metadata = message.get("metadata")
                    assert metadata.get("pipeline_name_hash") == hash_name(pipeline_name)
                    assert metadata.get("num_pipelines_in_repo") == str(1)
                    assert metadata.get("repo_hash") == hash_name(
                        get_ephemeral_repository_name(pipeline_name)
                    )
                assert set(message.keys()) == EXPECTED_KEYS
            assert len(caplog.records) == 5
            assert result.exit_code == 0
Ejemplo n.º 2
0
def test_repo_stats(caplog):
    with tempfile.TemporaryDirectory() as temp_dir:
        with instance_for_test(temp_dir=temp_dir, overrides={"telemetry": {"enabled": True}}):
            runner = CliRunner(env={"DAGSTER_HOME": temp_dir})
            with pushd(path_to_file("")):
                pipeline_name = "multi_mode_with_resources"
                result = runner.invoke(
                    pipeline_execute_command,
                    [
                        "-f",
                        file_relative_path(__file__, "../../general_tests/test_repository.py"),
                        "-a",
                        "dagster_test_repository",
                        "-p",
                        pipeline_name,
                        "--preset",
                        "add",
                        "--tags",
                        '{ "foo": "bar" }',
                    ],
                )

                assert result.exit_code == 0, result.stdout

                for record in caplog.records:
                    message = json.loads(record.getMessage())
                    if message.get("action") == UPDATE_REPO_STATS:
                        metadata = message.get("metadata")
                        assert metadata.get("pipeline_name_hash") == hash_name(pipeline_name)
                        assert metadata.get("num_pipelines_in_repo") == str(4)
                        assert metadata.get("repo_hash") == hash_name("dagster_test_repository")
                    assert set(message.keys()) == EXPECTED_KEYS

                assert len(caplog.records) == 5
                assert result.exit_code == 0
Ejemplo n.º 3
0
def test_dagster_telemetry_enabled(caplog):
    with seven.TemporaryDirectory() as temp_dir:
        with environ({'DAGSTER_HOME': temp_dir}):
            with open(os.path.join(temp_dir, 'dagster.yaml'), 'w') as fd:
                yaml.dump({'telemetry': {
                    'enabled': True
                }},
                          fd,
                          default_flow_style=False)

            DagsterInstance.local_temp(temp_dir)
            runner = CliRunner(env={'DAGSTER_HOME': temp_dir})
            with pushd(path_to_file('')):
                pipeline_name = 'foo_pipeline'
                result = runner.invoke(
                    pipeline_execute_command,
                    [
                        '-f',
                        path_to_file('test_cli_commands.py'),
                        '-a',
                        pipeline_name,
                    ],
                )

                for record in caplog.records:
                    message = json.loads(record.getMessage())
                    if message.get('action') == UPDATE_REPO_STATS:
                        assert message.get('pipeline_name_hash') == hash_name(
                            'foo')
                        assert message.get('num_pipelines_in_repo') == str(1)
                        assert message.get('repo_hash') == hash_name(
                            EPHEMERAL_NAME)
                    assert set(message.keys()) == EXPECTED_KEYS
                assert len(caplog.records) == 5
                assert result.exit_code == 0
Ejemplo n.º 4
0
def test_dagster_telemetry_unset(caplog):
    with tempfile.TemporaryDirectory() as temp_dir:
        with instance_for_test_tempdir(temp_dir):
            runner = CliRunner(env={"DAGSTER_HOME": temp_dir})
            with pushd(path_to_file("")):
                pipeline_attribute = "foo_pipeline"
                pipeline_name = "foo"
                result = runner.invoke(
                    pipeline_execute_command,
                    [
                        "-f",
                        path_to_file("test_cli_commands.py"), "-a",
                        pipeline_attribute
                    ],
                )

                for record in caplog.records:
                    message = json.loads(record.getMessage())
                    if message.get("action") == UPDATE_REPO_STATS:
                        assert message.get("pipeline_name_hash") == hash_name(
                            pipeline_name)
                        assert message.get("num_pipelines_in_repo") == str(1)
                        assert message.get("repo_hash") == hash_name(
                            get_ephemeral_repository_name(pipeline_name))
                    assert set(message.keys()) == EXPECTED_KEYS

                assert len(caplog.records) == 5
                assert result.exit_code == 0
Ejemplo n.º 5
0
def _create_scheduler_run(
    instance,
    schedule_time,
    repo_location,
    external_schedule,
    external_pipeline,
    run_request,
):
    from dagster.daemon.daemon import get_telemetry_daemon_session_id

    run_config = run_request.run_config
    schedule_tags = run_request.tags

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        run_config,
        external_schedule.mode,
        step_keys_to_execute=None,
        known_state=None,
    )
    execution_plan_snapshot = external_execution_plan.execution_plan_snapshot

    pipeline_tags = external_pipeline.tags or {}
    check_tags(pipeline_tags, "pipeline_tags")
    tags = merge_dicts(pipeline_tags, schedule_tags)

    tags[SCHEDULED_EXECUTION_TIME_TAG] = to_timezone(schedule_time, "UTC").isoformat()
    if run_request.run_key:
        tags[RUN_KEY_TAG] = run_request.run_key

    log_action(
        instance,
        SCHEDULED_RUN_CREATED,
        metadata={
            "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(),
            "SCHEDULE_NAME_HASH": hash_name(external_schedule.name),
            "repo_hash": hash_name(repo_location.name),
            "pipeline_name_hash": hash_name(external_pipeline.name),
        },
    )

    return instance.create_run(
        pipeline_name=external_schedule.pipeline_name,
        run_id=None,
        run_config=run_config,
        mode=external_schedule.mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=external_pipeline.solid_selection,
        status=PipelineRunStatus.NOT_STARTED,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline.get_external_origin(),
        pipeline_code_origin=external_pipeline.get_python_origin(),
    )
Ejemplo n.º 6
0
def test_dagit_logs(
    server_mock,
    caplog,
):
    with tempfile.TemporaryDirectory() as temp_dir:
        with instance_for_test(temp_dir=temp_dir, overrides={"telemetry": {"enabled": True}}):
            runner = CliRunner(env={"DAGSTER_HOME": temp_dir})
            workspace_path = file_relative_path(__file__, "telemetry_repository.yaml")
            result = runner.invoke(
                ui,
                ["-w", workspace_path],
            )
            assert result.exit_code == 0, str(result.exception)

            expected_repo_stats = {
                hash_name("test_repository"): 1,
                hash_name("dagster_test_repository"): 4,
            }
            actions = set()
            records = []
            for record in caplog.records:
                try:
                    message = json.loads(record.getMessage())
                except seven.JSONDecodeError:
                    continue

                records.append(record)

                actions.add(message.get("action"))
                if message.get("action") == UPDATE_REPO_STATS:
                    assert message.get("pipeline_name_hash") == ""
                    repo_hash = message.get("repo_hash")

                    assert repo_hash in expected_repo_stats
                    expected_num_pipelines_in_repo = expected_repo_stats.get(repo_hash)
                    assert message.get("num_pipelines_in_repo") == str(
                        expected_num_pipelines_in_repo
                    )

                assert set(message.keys()) == set(
                    [
                        "action",
                        "client_time",
                        "elapsed_time",
                        "event_id",
                        "instance_id",
                        "pipeline_name_hash",
                        "num_pipelines_in_repo",
                        "repo_hash",
                        "python_version",
                        "metadata",
                        "version",
                    ]
                )

            assert actions == set([START_DAGIT_WEBSERVER, UPDATE_REPO_STATS])
            assert len(records) == 3
            assert server_mock.call_args_list == [mock.call()]
Ejemplo n.º 7
0
def _create_sensor_run(
    instance, repo_location, external_sensor, external_pipeline, run_request, target_data
):
    from dagster.daemon.daemon import get_telemetry_daemon_session_id

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        run_request.run_config,
        target_data.mode,
        step_keys_to_execute=None,
        known_state=None,
        instance=instance,
    )
    execution_plan_snapshot = external_execution_plan.execution_plan_snapshot

    pipeline_tags = external_pipeline.tags or {}
    check_tags(pipeline_tags, "pipeline_tags")
    tags = merge_dicts(
        merge_dicts(pipeline_tags, run_request.tags),
        PipelineRun.tags_for_sensor(external_sensor),
    )
    if run_request.run_key:
        tags[RUN_KEY_TAG] = run_request.run_key

    log_action(
        instance,
        SENSOR_RUN_CREATED,
        metadata={
            "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(),
            "SENSOR_NAME_HASH": hash_name(external_sensor.name),
            "pipeline_name_hash": hash_name(external_pipeline.name),
            "repo_hash": hash_name(repo_location.name),
        },
    )

    return instance.create_run(
        pipeline_name=target_data.pipeline_name,
        run_id=None,
        run_config=run_request.run_config,
        mode=target_data.mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=None,
        status=PipelineRunStatus.NOT_STARTED,
        solid_selection=target_data.solid_selection,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline.get_external_origin(),
        pipeline_code_origin=external_pipeline.get_python_origin(),
    )
Ejemplo n.º 8
0
def test_hash_name():
    pipelines = ['pipeline_1', 'pipeline_2', 'pipeline_3']
    hashes = [hash_name(p) for p in pipelines]
    for h in hashes:
        assert len(h) == 64

    assert SequenceMatcher(None, hashes[0], hashes[1]).ratio() < 0.4
    assert SequenceMatcher(None, hashes[0], hashes[2]).ratio() < 0.4
    assert SequenceMatcher(None, hashes[1], hashes[2]).ratio() < 0.4
Ejemplo n.º 9
0
def test_repo_stats(caplog):
    with seven.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            with open(os.path.join(temp_dir, "dagster.yaml"), "w") as fd:
                yaml.dump({}, fd, default_flow_style=False)

            DagsterInstance.local_temp(temp_dir)
            runner = CliRunner(env={"DAGSTER_HOME": temp_dir})
            with pushd(path_to_file("")):
                pipeline_name = "multi_mode_with_resources"
                result = runner.invoke(
                    pipeline_execute_command,
                    [
                        "-f",
                        file_relative_path(
                            __file__,
                            "../../general_tests/test_repository.py"),
                        "-a",
                        "dagster_test_repository",
                        "-p",
                        pipeline_name,
                        "--preset",
                        "add",
                        "--tags",
                        '{ "foo": "bar" }',
                    ],
                )

                assert result.exit_code == 0, result.stdout

                for record in caplog.records:
                    message = json.loads(record.getMessage())
                    if message.get("action") == UPDATE_REPO_STATS:
                        assert message.get("pipeline_name_hash") == hash_name(
                            pipeline_name)
                        assert message.get("num_pipelines_in_repo") == str(4)
                        assert message.get("repo_hash") == hash_name(
                            "dagster_test_repository")
                    assert set(message.keys()) == EXPECTED_KEYS

                assert len(caplog.records) == 5
                assert result.exit_code == 0
Ejemplo n.º 10
0
def test_repo_stats(caplog):
    with seven.TemporaryDirectory() as temp_dir:
        with environ({'DAGSTER_HOME': temp_dir}):
            with open(os.path.join(temp_dir, 'dagster.yaml'), 'w') as fd:
                yaml.dump({}, fd, default_flow_style=False)

            DagsterInstance.local_temp(temp_dir)
            runner = CliRunner(env={'DAGSTER_HOME': temp_dir})
            with pushd(path_to_file('')):
                pipeline_name = 'multi_mode_with_resources'
                result = runner.invoke(
                    pipeline_execute_command,
                    [
                        '-w',
                        file_relative_path(__file__, '../workspace.yaml'),
                        '-p',
                        pipeline_name,
                        '--preset',
                        'add',
                        '--tags',
                        '{ "foo": "bar" }',
                    ],
                )

                assert result.exit_code == 0, result.stdout

                for record in caplog.records:
                    message = json.loads(record.getMessage())
                    if message.get('action') == UPDATE_REPO_STATS:
                        assert message.get('pipeline_name_hash') == hash_name(
                            pipeline_name)
                        assert message.get('num_pipelines_in_repo') == str(4)
                        assert message.get('repo_hash') == hash_name(
                            'dagster_test_repository')
                    assert set(message.keys()) == EXPECTED_KEYS

                assert len(caplog.records) == 5
                assert result.exit_code == 0
Ejemplo n.º 11
0
def test_dagit_logs(
    server_mock,
    caplog,
):
    with seven.TemporaryDirectory() as temp_dir:
        with environ({'DAGSTER_HOME': temp_dir}):
            with open(os.path.join(temp_dir, 'dagster.yaml'), 'w') as fd:
                yaml.dump({}, fd, default_flow_style=False)

            DagsterInstance.local_temp(temp_dir)
            runner = CliRunner(env={'DAGSTER_HOME': temp_dir})
            with pushd(path_to_tutorial_file('')):

                result = runner.invoke(
                    ui,
                    [
                        '-w',
                        file_relative_path(__file__,
                                           'telemetry_repository.yaml'),
                    ],
                )
                assert result.exit_code == 0, str(result.exception)

                actions = set()
                for record in caplog.records:
                    message = json.loads(record.getMessage())
                    actions.add(message.get('action'))
                    if message.get('action') == UPDATE_REPO_STATS:
                        assert message.get('pipeline_name_hash') == ''
                        assert message.get('num_pipelines_in_repo') == str(4)
                        assert message.get('repo_hash') == hash_name(
                            'dagster_test_repository')
                    assert set(message.keys()) == set([
                        'action',
                        'client_time',
                        'elapsed_time',
                        'event_id',
                        'instance_id',
                        'pipeline_name_hash',
                        'num_pipelines_in_repo',
                        'repo_hash',
                        'python_version',
                        'metadata',
                        'version',
                    ])

                assert actions == set(
                    [START_DAGIT_WEBSERVER, UPDATE_REPO_STATS])
                assert len(caplog.records) == 2
                assert server_mock.call_args_list == [mock.call()]
Ejemplo n.º 12
0
def test_dagster_telemetry_enabled(caplog):
    with seven.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            with open(os.path.join(temp_dir, "dagster.yaml"), "w") as fd:
                yaml.dump({"telemetry": {
                    "enabled": True
                }},
                          fd,
                          default_flow_style=False)

            DagsterInstance.local_temp(temp_dir)
            runner = CliRunner(env={"DAGSTER_HOME": temp_dir})
            with pushd(path_to_file("")):
                pipeline_attribute = "foo_pipeline"
                pipeline_name = "foo"
                result = runner.invoke(
                    pipeline_execute_command,
                    [
                        "-f",
                        path_to_file("test_cli_commands.py"),
                        "-a",
                        pipeline_attribute,
                    ],
                )

                for record in caplog.records:
                    message = json.loads(record.getMessage())
                    if message.get("action") == UPDATE_REPO_STATS:
                        assert message.get("pipeline_name_hash") == hash_name(
                            pipeline_name)
                        assert message.get("num_pipelines_in_repo") == str(1)
                        assert message.get("repo_hash") == hash_name(
                            get_ephemeral_repository_name(pipeline_name))
                    assert set(message.keys()) == EXPECTED_KEYS
                assert len(caplog.records) == 5
                assert result.exit_code == 0
Ejemplo n.º 13
0
def test_dagit_logs(
    server_mock,
    caplog,
):
    with seven.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            with open(os.path.join(temp_dir, "dagster.yaml"), "w") as fd:
                yaml.dump({}, fd, default_flow_style=False)

            DagsterInstance.local_temp(temp_dir)
            runner = CliRunner(env={"DAGSTER_HOME": temp_dir})
            result = runner.invoke(
                ui,
                [
                    "-w",
                    file_relative_path(__file__, "telemetry_repository.yaml"),
                ],
            )
            assert result.exit_code == 0, str(result.exception)

            actions = set()
            for record in caplog.records:
                message = json.loads(record.getMessage())
                actions.add(message.get("action"))
                if message.get("action") == UPDATE_REPO_STATS:
                    assert message.get("pipeline_name_hash") == ""
                    assert message.get("num_pipelines_in_repo") == str(4)
                    assert message.get("repo_hash") == hash_name(
                        "dagster_test_repository")
                assert set(message.keys()) == set([
                    "action",
                    "client_time",
                    "elapsed_time",
                    "event_id",
                    "instance_id",
                    "pipeline_name_hash",
                    "num_pipelines_in_repo",
                    "repo_hash",
                    "python_version",
                    "metadata",
                    "version",
                ])

            assert actions == set([START_DAGIT_WEBSERVER, UPDATE_REPO_STATS])
            assert len(caplog.records) == 2
            assert server_mock.call_args_list == [mock.call()]
Ejemplo n.º 14
0
def create_backfill_run(instance, repo_location, external_pipeline,
                        external_partition_set, backfill_job, partition_data):
    from dagster.daemon.daemon import get_telemetry_daemon_session_id

    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(repo_location, "repo_location", RepositoryLocation)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.inst_param(external_partition_set, "external_partition_set",
                     ExternalPartitionSet)
    check.inst_param(backfill_job, "backfill_job", PartitionBackfill)
    check.inst_param(partition_data, "partition_data",
                     ExternalPartitionExecutionParamData)

    log_action(
        instance,
        BACKFILL_RUN_CREATED,
        metadata={
            "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(),
            "repo_hash": hash_name(repo_location.name),
            "pipeline_name_hash": hash_name(external_pipeline.name),
        },
    )

    tags = merge_dicts(
        external_pipeline.tags,
        partition_data.tags,
        PipelineRun.tags_for_backfill_id(backfill_job.backfill_id),
        backfill_job.tags,
    )

    solids_to_execute = None
    solid_selection = None
    if not backfill_job.from_failure and not backfill_job.reexecution_steps:
        step_keys_to_execute = None
        parent_run_id = None
        root_run_id = None
        known_state = None
        if external_partition_set.solid_selection:
            solids_to_execute = frozenset(
                external_partition_set.solid_selection)
            solid_selection = external_partition_set.solid_selection

    elif backfill_job.from_failure:
        last_run = _fetch_last_run(instance, external_partition_set,
                                   partition_data.name)
        if not last_run or last_run.status != PipelineRunStatus.FAILURE:
            return None
        return instance.create_reexecuted_run_from_failure(
            last_run,
            repo_location,
            external_pipeline,
            tags=tags,
            run_config=partition_data.run_config,
            mode=external_partition_set.mode,
        )

    elif backfill_job.reexecution_steps:
        last_run = _fetch_last_run(instance, external_partition_set,
                                   partition_data.name)
        parent_run_id = last_run.run_id if last_run else None
        root_run_id = (last_run.root_run_id
                       or last_run.run_id) if last_run else None
        if parent_run_id and root_run_id:
            tags = merge_dicts(tags, {
                PARENT_RUN_ID_TAG: parent_run_id,
                ROOT_RUN_ID_TAG: root_run_id
            })
        step_keys_to_execute = backfill_job.reexecution_steps
        if last_run and last_run.status == PipelineRunStatus.SUCCESS:
            known_state = KnownExecutionState.for_reexecution(
                instance.all_logs(parent_run_id),
                step_keys_to_execute,
            )
        else:
            known_state = None

        if external_partition_set.solid_selection:
            solids_to_execute = frozenset(
                external_partition_set.solid_selection)
            solid_selection = external_partition_set.solid_selection

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        partition_data.run_config,
        external_partition_set.mode,
        step_keys_to_execute=step_keys_to_execute,
        known_state=known_state,
        instance=instance,
    )

    return instance.create_run(
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=external_execution_plan.
        execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        pipeline_name=external_pipeline.name,
        run_id=make_new_run_id(),
        solids_to_execute=solids_to_execute,
        run_config=partition_data.run_config,
        mode=external_partition_set.mode,
        step_keys_to_execute=step_keys_to_execute,
        tags=tags,
        root_run_id=root_run_id,
        parent_run_id=parent_run_id,
        status=PipelineRunStatus.NOT_STARTED,
        external_pipeline_origin=external_pipeline.get_external_origin(),
        pipeline_code_origin=external_pipeline.get_python_origin(),
        solid_selection=solid_selection,
    )