예제 #1
0
    def ephemeral(tempdir=None):
        from dagster.core.launcher.sync_in_memory_run_launcher import SyncInMemoryRunLauncher
        from dagster.core.storage.event_log import InMemoryEventLogStorage
        from dagster.core.storage.root import LocalArtifactStorage
        from dagster.core.storage.runs import InMemoryRunStorage
        from dagster.core.storage.noop_compute_log_manager import NoOpComputeLogManager

        if tempdir is None:
            tempdir = DagsterInstance.temp_storage()

        return DagsterInstance(
            InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            run_launcher=SyncInMemoryRunLauncher(),
        )
예제 #2
0
def dagster_instance(helm_postgres_url):  # pylint: disable=redefined-outer-name

    with tempfile.TemporaryDirectory() as tempdir:
        with environ({"DAGSTER_HOME": tempdir}):

            with DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(tempdir),
                run_storage=PostgresRunStorage(helm_postgres_url),
                event_storage=PostgresEventLogStorage(helm_postgres_url),
                compute_log_manager=NoOpComputeLogManager(),
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=ExplodingRunLauncher(),  # use graphql to launch any runs
                ref=InstanceRef.from_dir(tempdir),
            ) as instance:
                yield instance

                check_export_runs(instance)
예제 #3
0
def dagster_instance_for_k8s_run_launcher(
        helm_postgres_url_for_k8s_run_launcher, run_launcher):  # pylint: disable=redefined-outer-name
    tempdir = DagsterInstance.temp_storage()

    with DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=PostgresRunStorage(
                helm_postgres_url_for_k8s_run_launcher),
            event_storage=PostgresEventLogStorage(
                helm_postgres_url_for_k8s_run_launcher),
            schedule_storage=PostgresScheduleStorage(
                helm_postgres_url_for_k8s_run_launcher),
            compute_log_manager=NoOpComputeLogManager(),
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=run_launcher,
    ) as instance:
        yield instance
예제 #4
0
def dagster_instance(helm_namespace, run_launcher):  # pylint: disable=redefined-outer-name
    tempdir = DagsterInstance.temp_storage()

    with local_port_forward_postgres(
            namespace=helm_namespace) as local_forward_port:
        postgres_url = 'postgresql://*****:*****@localhost:{local_forward_port}/test'.format(
            local_forward_port=local_forward_port)
        print('Local Postgres forwarding URL: ', postgres_url)

        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=PostgresRunStorage(postgres_url),
            event_storage=PostgresEventLogStorage(postgres_url),
            compute_log_manager=NoOpComputeLogManager(),
            run_launcher=run_launcher,
        )
        yield instance
예제 #5
0
def dagster_instance_with_k8s_scheduler(helm_postgres_url_for_k8s_run_launcher,
                                        run_launcher, k8s_scheduler,
                                        schedule_tempdir):  # pylint: disable=redefined-outer-name
    with DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(schedule_tempdir),
            run_storage=SqliteRunStorage.from_local(
                os.path.join(schedule_tempdir, "runs")),
            event_storage=PostgresEventLogStorage(
                helm_postgres_url_for_k8s_run_launcher),
            compute_log_manager=NoOpComputeLogManager(),
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=run_launcher,
            schedule_storage=SqliteScheduleStorage.from_local(
                os.path.join(schedule_tempdir, "schedules")),
            scheduler=k8s_scheduler,
    ) as instance:
        yield instance
 def _postgres_instance_with_grpc_api_hijack():
     with seven.TemporaryDirectory() as temp_dir:
         with graphql_postgres_instance() as pg_conn_string:
             instance = DagsterInstance(
                 instance_type=InstanceType.EPHEMERAL,
                 local_artifact_storage=LocalArtifactStorage(temp_dir),
                 run_storage=TestPostgresInstance.clean_run_storage(pg_conn_string),
                 event_storage=TestPostgresInstance.clean_event_log_storage(pg_conn_string),
                 compute_log_manager=LocalComputeLogManager(temp_dir),
                 run_launcher=EphemeralGrpcRunLauncher(),
                 schedule_storage=TestPostgresInstance.clean_schedule_storage(
                     pg_conn_string
                 ),
             )
             try:
                 yield instance
             finally:
                 instance.run_launcher.join()
def test_basic_start_scheduled_execution_with_run_launcher():
    test_queue = InMemoryRunLauncher()

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            run_launcher=test_queue,
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        scheduler_handle = context.scheduler_handle
        scheduler_handle.up(python_path=sys.executable,
                            repository_path=file_relative_path(
                                __file__, '../'))

        result = execute_dagster_graphql(
            context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'no_config_pipeline_hourly_schedule'},
        )

        assert not result.errors
        assert result.data

        # just test existence
        assert (result.data['startScheduledExecution']['__typename'] ==
                'LaunchPipelineExecutionSuccess')

        assert uuid.UUID(
            result.data['startScheduledExecution']['run']['runId'])
        assert (result.data['startScheduledExecution']['run']['pipeline']
                ['name'] == 'no_config_pipeline')

        assert any(
            tag['key'] == 'dagster/schedule_name'
            and tag['value'] == 'no_config_pipeline_hourly_schedule'
            for tag in result.data['startScheduledExecution']['run']['tags'])
예제 #8
0
def test_run_launcher():
    test_queue = InMemoryRunLauncher()

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            run_launcher=test_queue,
        )

    context = define_context_for_repository_yaml(path=file_relative_path(
        __file__, '../repository.yaml'),
                                                 instance=instance)

    result = execute_dagster_graphql(
        context=context,
        query=LAUNCH_PIPELINE_EXECUTION_MUTATION,
        variables={
            'executionParams': {
                'selector': {
                    'name': 'no_config_pipeline'
                },
                'mode': 'default'
            }
        },
    )

    assert result.data['launchPipelineExecution'][
        '__typename'] == 'LaunchPipelineRunSuccess'
    assert result.data['launchPipelineExecution']['run'][
        'status'] == 'NOT_STARTED'

    run_id = result.data['launchPipelineExecution']['run']['runId']

    test_queue.run_one(instance)

    result = execute_dagster_graphql(context=context,
                                     query=RUN_QUERY,
                                     variables={'runId': run_id})
    assert result.data['pipelineRunOrError']['__typename'] == 'PipelineRun'
    assert result.data['pipelineRunOrError']['status'] == 'SUCCESS'
예제 #9
0
def test_start_stop_schedule():

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesystemTestScheduler(temp_dir),
            run_launcher=SyncInMemoryRunLauncher(),
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        # Initialize scheduler
        repository = context.legacy_get_repository_definition()
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        start_result = execute_dagster_graphql(
            context,
            START_SCHEDULES_QUERY,
            variables={'scheduleName': 'no_config_pipeline_hourly_schedule'},
        )
        assert start_result.data['startSchedule']['schedule'][
            'status'] == 'RUNNING'

        # Stop schedule
        stop_result = execute_dagster_graphql(
            context,
            STOP_SCHEDULES_QUERY,
            variables={'scheduleName': 'no_config_pipeline_hourly_schedule'},
        )
        assert stop_result.data['stopRunningSchedule']['schedule'][
            'status'] == 'STOPPED'
예제 #10
0
def test_get_all_schedules():

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesytemTestScheduler(temp_dir),
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        # Initialize scheduler
        repository = context.get_repository()
        scheduler_handle = context.scheduler_handle
        scheduler_handle.up(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        schedule = instance.start_schedule(
            repository, "no_config_pipeline_hourly_schedule")

        # Query Scheduler + all Schedules
        scheduler_result = execute_dagster_graphql(context,
                                                   GET_SCHEDULES_QUERY)

        assert scheduler_result.data
        assert scheduler_result.data['scheduler']
        assert scheduler_result.data['scheduler']['runningSchedules']
        assert len(
            scheduler_result.data['scheduler']['runningSchedules']) == 11

        for schedule in scheduler_result.data['scheduler']['runningSchedules']:
            assert (schedule['scheduleDefinition']['environmentConfigYaml'] ==
                    'storage:\n  filesystem: {}\n')
예제 #11
0
def dagster_instance_for_user_deployments_subchart_disabled(
    helm_postgres_url_for_user_deployments_subchart_disabled,
):  # pylint: disable=redefined-outer-name
    tempdir = DagsterInstance.temp_storage()

    with DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(tempdir),
        run_storage=PostgresRunStorage(helm_postgres_url_for_user_deployments_subchart_disabled),
        event_storage=PostgresEventLogStorage(
            helm_postgres_url_for_user_deployments_subchart_disabled
        ),
        compute_log_manager=NoOpComputeLogManager(),
        run_coordinator=DefaultRunCoordinator(),
        run_launcher=ExplodingRunLauncher(),
    ) as instance:
        yield instance

        check_export_runs(instance)
예제 #12
0
def dagster_instance_for_daemon(
    helm_postgres_url_for_daemon,
):  # pylint: disable=redefined-outer-name
    tempdir = DagsterInstance.temp_storage()

    with DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(tempdir),
        run_storage=PostgresRunStorage(helm_postgres_url_for_daemon),
        event_storage=PostgresEventLogStorage(helm_postgres_url_for_daemon),
        schedule_storage=PostgresScheduleStorage(helm_postgres_url_for_daemon),
        compute_log_manager=NoOpComputeLogManager(),
        run_coordinator=QueuedRunCoordinator(),
        run_launcher=ExplodingRunLauncher(),
        scheduler=DagsterDaemonScheduler(),
    ) as instance:
        yield instance

        check_export_runs(instance)
예제 #13
0
def run_launch(execution_args, expected_count=None):
    runner = CliRunner()
    run_launcher = InMemoryRunLauncher()
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(),
            run_launcher=run_launcher,
        )
        with mock.patch('dagster.core.instance.DagsterInstance.get') as _instance:
            _instance.return_value = instance

            result = runner.invoke(pipeline_launch_command, execution_args)
            assert result.exit_code == 0, result.stdout
            if expected_count:
                assert len(run_launcher.queue()) == expected_count
예제 #14
0
def test_fs_stores():
    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info("easy")
            return "easy"

        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            run_store = SqliteRunStorage.from_local(temp_dir)
            event_store = SqliteEventLogStorage(temp_dir)
            compute_log_manager = LocalComputeLogManager(temp_dir)
            instance = DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(temp_dir),
                run_storage=run_store,
                event_storage=event_store,
                compute_log_manager=compute_log_manager,
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=DefaultRunLauncher(),
                ref=InstanceRef.from_dir(temp_dir),
                settings={"telemetry": {
                    "enabled": False
                }},
            )

            result = execute_pipeline(simple, instance=instance)

            assert run_store.has_run(result.run_id)
            assert run_store.get_run_by_id(
                result.run_id).status == PipelineRunStatus.SUCCESS
            assert DagsterEventType.PIPELINE_SUCCESS in [
                event.dagster_event.event_type
                for event in event_store.get_logs_for_run(result.run_id)
                if event.is_dagster_event
            ]
            stats = event_store.get_stats_for_run(result.run_id)
            assert stats.steps_succeeded == 1
            assert stats.end_time is not None
예제 #15
0
    def ephemeral(tempdir=None):
        from dagster.core.storage.event_log import InMemoryEventLogStorage
        from dagster.core.storage.root import LocalArtifactStorage
        from dagster.core.storage.runs import InMemoryRunStorage
        from dagster.core.storage.local_compute_log_manager import NoOpComputeLogManager

        if tempdir is None:
            tempdir = DagsterInstance.temp_storage()

        feature_set = dagster_feature_set(tempdir)

        return DagsterInstance(
            InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(
                _compute_logs_directory(tempdir)),
            feature_set=feature_set,
        )
예제 #16
0
def dagster_instance_for_k8s_run_launcher(
    helm_postgres_url_for_k8s_run_launcher,
):  # pylint: disable=redefined-outer-name
    tempdir = DagsterInstance.temp_storage()

    instance_ref = InstanceRef.from_dir(tempdir)

    with DagsterInstance(
        instance_type=InstanceType.PERSISTENT,
        local_artifact_storage=LocalArtifactStorage(tempdir),
        run_storage=PostgresRunStorage(helm_postgres_url_for_k8s_run_launcher),
        event_storage=PostgresEventLogStorage(helm_postgres_url_for_k8s_run_launcher),
        schedule_storage=PostgresScheduleStorage(helm_postgres_url_for_k8s_run_launcher),
        compute_log_manager=NoOpComputeLogManager(),
        run_coordinator=DefaultRunCoordinator(),
        run_launcher=ExplodingRunLauncher(),
        ref=instance_ref,
    ) as instance:
        yield instance

        check_export_runs(instance)
예제 #17
0
def dagster_instance_with_k8s_scheduler(helm_namespace, run_launcher,
                                        k8s_scheduler, schedule_tempdir):
    with local_port_forward_postgres(
            namespace=helm_namespace) as local_forward_port:
        postgres_url = 'postgresql://*****:*****@localhost:{local_forward_port}/test'.format(
            local_forward_port=local_forward_port)
        print('Local Postgres forwarding URL: ', postgres_url)

        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(schedule_tempdir),
            run_storage=SqliteRunStorage.from_local(
                os.path.join(schedule_tempdir, 'runs')),
            event_storage=PostgresEventLogStorage(postgres_url),
            compute_log_manager=NoOpComputeLogManager(),
            run_launcher=run_launcher,
            schedule_storage=SqliteScheduleStorage.from_local(
                os.path.join(schedule_tempdir, 'schedules')),
            scheduler=k8s_scheduler,
        )
        yield instance
예제 #18
0
def dagster_instance_for_k8s_run_launcher(helm_namespace_for_k8s_run_launcher, run_launcher):
    tempdir = DagsterInstance.temp_storage()

    with local_port_forward_postgres(
        namespace=helm_namespace_for_k8s_run_launcher
    ) as local_forward_port:
        postgres_url = "postgresql://*****:*****@localhost:{local_forward_port}/test".format(
            local_forward_port=local_forward_port
        )
        print("Local Postgres forwarding URL: ", postgres_url)

        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=PostgresRunStorage(postgres_url),
            event_storage=PostgresEventLogStorage(postgres_url),
            schedule_storage=PostgresScheduleStorage(postgres_url),
            compute_log_manager=NoOpComputeLogManager(),
            run_launcher=run_launcher,
        )
        yield instance
예제 #19
0
파일: cluster.py 프로젝트: xsswhale/dagster
def dagster_instance_for_daemon(helm_namespace_for_daemon, run_launcher):  # pylint: disable=redefined-outer-name
    tempdir = DagsterInstance.temp_storage()

    with local_port_forward_postgres(
            namespace=helm_namespace_for_daemon) as local_forward_port:
        postgres_url = "postgresql://*****:*****@localhost:{local_forward_port}/test".format(
            local_forward_port=local_forward_port)
        print("Local Postgres forwarding URL: ", postgres_url)

        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=PostgresRunStorage(postgres_url),
            event_storage=PostgresEventLogStorage(postgres_url),
            schedule_storage=PostgresScheduleStorage(postgres_url),
            compute_log_manager=NoOpComputeLogManager(),
            run_coordinator=QueuedRunCoordinator(),
            run_launcher=run_launcher,
            scheduler=DagsterDaemonScheduler(),
        )
        yield instance
예제 #20
0
    def test_run_record_timestamps(self, storage):
        assert storage

        self._skip_in_memory(storage)

        @op
        def a():
            pass

        @job
        def my_job():
            a()

        with tempfile.TemporaryDirectory() as temp_dir:
            if storage._instance:  # pylint: disable=protected-access
                instance = storage._instance  # pylint: disable=protected-access
            else:
                instance = DagsterInstance(
                    instance_type=InstanceType.EPHEMERAL,
                    local_artifact_storage=LocalArtifactStorage(temp_dir),
                    run_storage=storage,
                    event_storage=InMemoryEventLogStorage(),
                    compute_log_manager=NoOpComputeLogManager(),
                    run_coordinator=DefaultRunCoordinator(),
                    run_launcher=SyncInMemoryRunLauncher(),
                )

            freeze_datetime = to_timezone(
                create_pendulum_time(2019, 11, 2, 0, 0, 0, tz="US/Central"), "US/Pacific"
            )

            with pendulum.test(freeze_datetime):
                result = my_job.execute_in_process(instance=instance)
                records = instance.get_run_records(
                    filters=PipelineRunsFilter(run_ids=[result.run_id])
                )
                assert len(records) == 1
                record = records[0]
                assert record.start_time == freeze_datetime.timestamp()
                assert record.end_time == freeze_datetime.timestamp()
예제 #21
0
def test_compute_log_manager_skip_empty_upload(mock_s3_bucket):
    @op
    def easy(context):
        context.log.info("easy")

    @job
    def simple():
        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            run_store = SqliteRunStorage.from_local(temp_dir)
            event_store = SqliteEventLogStorage(temp_dir)
            PREFIX = "my_prefix"
            manager = S3ComputeLogManager(bucket=mock_s3_bucket.name,
                                          prefix=PREFIX,
                                          skip_empty_files=True)
            instance = DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(temp_dir),
                run_storage=run_store,
                event_storage=event_store,
                compute_log_manager=manager,
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=DefaultRunLauncher(),
                ref=InstanceRef.from_dir(temp_dir),
            )
            result = simple.execute_in_process(instance=instance)

            stderr_object = mock_s3_bucket.Object(
                key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.err"
            ).get()
            assert stderr_object

            with pytest.raises(ClientError):
                # stdout is not uploaded because we do not print anything to stdout
                mock_s3_bucket.Object(
                    key=f"{PREFIX}/storage/{result.run_id}/compute_logs/easy.out"
                ).get()
예제 #22
0
def test_fs_stores():
    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info('easy')
            return 'easy'

        easy()

    with seven.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = SqliteEventLogStorage(temp_dir)
        compute_log_manager = LocalComputeLogManager(temp_dir)
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=compute_log_manager,
        )

        run = RunConfig()
        execute_pipeline(simple, run_config=run, instance=instance)

        assert run_store.has_run(run.run_id)
        assert run_store.get_run_by_id(
            run.run_id).status == PipelineRunStatus.SUCCESS
        assert DagsterEventType.PIPELINE_SUCCESS in [
            event.dagster_event.event_type
            for event in event_store.get_logs_for_run(run.run_id)
            if event.is_dagster_event
        ]
        stats = event_store.get_stats_for_run(run.run_id)
        assert stats.steps_succeeded == 1
        assert stats.end_time is not None
예제 #23
0
def test_compute_log_manager(s3_bucket):
    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info('easy')
            print(HELLO_WORLD)
            return 'easy'

        easy()

    # Uses mock S3
    s3 = boto3.client('s3')
    s3.create_bucket(Bucket=s3_bucket)

    with seven.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = SqliteEventLogStorage(temp_dir)
        manager = S3ComputeLogManager(bucket=s3_bucket,
                                      prefix='my_prefix',
                                      local_dir=temp_dir)
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=manager,
            run_launcher=CliApiRunLauncher(),
        )
        result = execute_pipeline(simple, instance=instance)
        compute_steps = [
            event.step_key for event in result.step_event_list
            if event.event_type == DagsterEventType.STEP_START
        ]
        assert len(compute_steps) == 1
        step_key = compute_steps[0]

        stdout = manager.read_logs_file(result.run_id, step_key,
                                        ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key,
                                        ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data

        # Check S3 directly
        s3_object = s3.get_object(
            Bucket=s3_bucket,
            Key='{prefix}/storage/{run_id}/compute_logs/easy.compute.err'.
            format(prefix='my_prefix', run_id=result.run_id),
        )
        stderr_s3 = six.ensure_str(s3_object['Body'].read())
        for expected in EXPECTED_LOGS:
            assert expected in stderr_s3

        # Check download behavior by deleting locally cached logs
        compute_logs_dir = os.path.join(temp_dir, result.run_id,
                                        'compute_logs')
        for filename in os.listdir(compute_logs_dir):
            os.unlink(os.path.join(compute_logs_dir, filename))

        stdout = manager.read_logs_file(result.run_id, step_key,
                                        ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key,
                                        ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data
예제 #24
0
def dagster_instance(helm_namespace, run_launcher):  # pylint: disable=redefined-outer-name
    @contextmanager
    def local_port_forward_postgres():
        print('Port-forwarding postgres')
        postgres_pod_name = (check_output([
            'kubectl',
            'get',
            'pods',
            '--namespace',
            helm_namespace,
            '-l',
            'app=postgresql,release=dagster',
            '-o',
            'jsonpath="{.items[0].metadata.name}"',
        ]).decode('utf-8').strip('"'))
        forward_port = find_free_port()

        wait_for_pod(postgres_pod_name, namespace=helm_namespace)

        try:
            p = subprocess.Popen([
                'kubectl',
                'port-forward',
                '--namespace',
                helm_namespace,
                postgres_pod_name,
                '{forward_port}:5432'.format(forward_port=forward_port),
            ])

            # Validate port forwarding works
            start = time.time()

            while True:
                if time.time() - start > PG_PORT_FORWARDING_TIMEOUT:
                    raise Exception(
                        'Timed out while waiting for postgres port forwarding')

                print(
                    'Waiting for port forwarding from k8s pod %s:5432 to localhost:%d to be'
                    ' available...' % (postgres_pod_name, forward_port))
                try:
                    conn = psycopg2.connect(
                        database='test',
                        user='******',
                        password='******',
                        host='localhost',
                        port=forward_port,
                    )
                    conn.close()
                    break
                except:  # pylint: disable=bare-except, broad-except
                    time.sleep(1)
                    continue

            yield forward_port

        finally:
            print('Terminating port-forwarding')
            p.terminate()

    tempdir = DagsterInstance.temp_storage()

    with local_port_forward_postgres() as local_forward_port:
        postgres_url = 'postgresql://*****:*****@localhost:{local_forward_port}/test'.format(
            local_forward_port=local_forward_port)
        print('Local Postgres forwarding URL: ', postgres_url)

        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=PostgresRunStorage(postgres_url),
            event_storage=PostgresEventLogStorage(postgres_url),
            compute_log_manager=NoOpComputeLogManager(),
            run_launcher=run_launcher,
        )
        yield instance
예제 #25
0
def test_compute_log_manager_with_envvar(gcs_bucket):
    @job
    def simple():
        @op
        def easy(context):
            context.log.info("easy")
            print(HELLO_WORLD)  # pylint: disable=print-call
            return "easy"

        easy()

    with open(os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")) as f:
        with tempfile.TemporaryDirectory() as temp_dir:
            with environ({"ENV_VAR": f.read(), "DAGSTER_HOME": temp_dir}):
                run_store = SqliteRunStorage.from_local(temp_dir)
                event_store = SqliteEventLogStorage(temp_dir)
                manager = GCSComputeLogManager(
                    bucket=gcs_bucket,
                    prefix="my_prefix",
                    local_dir=temp_dir,
                    json_credentials_envvar="ENV_VAR",
                )
                instance = DagsterInstance(
                    instance_type=InstanceType.PERSISTENT,
                    local_artifact_storage=LocalArtifactStorage(temp_dir),
                    run_storage=run_store,
                    event_storage=event_store,
                    compute_log_manager=manager,
                    run_coordinator=DefaultRunCoordinator(),
                    run_launcher=DefaultRunLauncher(),
                    ref=InstanceRef.from_dir(temp_dir),
                )
                result = simple.execute_in_process(instance=instance)
                compute_steps = [
                    event.step_key
                    for event in result.all_node_events
                    if event.event_type == DagsterEventType.STEP_START
                ]
                assert len(compute_steps) == 1
                step_key = compute_steps[0]

                stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
                assert stdout.data == HELLO_WORLD + SEPARATOR

                stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
                for expected in EXPECTED_LOGS:
                    assert expected in stderr.data

                # Check GCS directly
                stderr_gcs = (
                    storage.Client()
                    .bucket(gcs_bucket)
                    .blob(f"my_prefix/storage/{result.run_id}/compute_logs/easy.err")
                    .download_as_bytes()
                    .decode("utf-8")
                )

                for expected in EXPECTED_LOGS:
                    assert expected in stderr_gcs

                # Check download behavior by deleting locally cached logs
                compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs")
                for filename in os.listdir(compute_logs_dir):
                    os.unlink(os.path.join(compute_logs_dir, filename))

                stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
                assert stdout.data == HELLO_WORLD + SEPARATOR

                stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
                for expected in EXPECTED_LOGS:
                    assert expected in stderr.data
예제 #26
0
def test_dev_loop_changing_versions():
    with seven.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = ConsolidatedSqliteEventLogStorage(temp_dir)
        compute_log_manager = LocalComputeLogManager(temp_dir)
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=compute_log_manager,
            run_launcher=CliApiRunLauncher(),
        )

        run_config = {
            "solids": {
                "create_string_1": {"config": {"input_str": "apple", "base_dir": temp_dir}},
                "create_string_2": {"config": {"input_str": "apple", "base_dir": temp_dir}},
                "take_string_1": {"config": {"input_str": "apple", "base_dir": temp_dir}},
                "take_string_2": {"config": {"input_str": "apple", "base_dir": temp_dir}},
                "take_string_two_inputs": {"config": {"input_str": "apple", "base_dir": temp_dir}},
            },
            "intermediate_storage": {"filesystem": {"config": {"base_dir": temp_dir}}},
        }

        result = execute_pipeline(
            basic_pipeline,
            run_config=run_config,
            mode="only_mode",
            tags={"dagster/is_memoized_run": "true"},
            instance=instance,
        )
        assert result.success

        assert not get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode")

        run_config["solids"]["take_string_1"]["config"]["input_str"] = "banana"

        assert set(
            get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode")
        ) == set(["take_string_1.compute", "take_string_two_inputs.compute"])

        result2 = execute_pipeline(
            basic_pipeline,
            run_config=run_config,
            mode="only_mode",
            tags={"dagster/is_memoized_run": "true"},
            instance=instance,
        )
        assert result2.success

        assert not get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode")

        run_config["solids"]["take_string_two_inputs"]["config"]["input_str"] = "banana"

        assert get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode") == [
            "take_string_two_inputs.compute"
        ]

        result3 = execute_pipeline(
            basic_pipeline,
            run_config=run_config,
            mode="only_mode",
            tags={"dagster/is_memoized_run": "true"},
            instance=instance,
        )
        assert result3.success

        assert not get_step_keys_to_execute(instance, basic_pipeline, run_config, "only_mode")
예제 #27
0
def test_execute_display_command():
    with tempfile.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = ConsolidatedSqliteEventLogStorage(temp_dir)
        compute_log_manager = LocalComputeLogManager(temp_dir)
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=compute_log_manager,
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=DefaultRunLauncher(),
        )
        run_config = {
            "solids": {
                "create_string_1_asset": {
                    "config": {
                        "input_str": "apple"
                    }
                },
                "take_string_1_asset": {
                    "config": {
                        "input_str": "apple"
                    }
                },
            },
            "resources": {
                "io_manager": {
                    "config": {
                        "base_dir": temp_dir
                    }
                }
            },
        }

        # write run config to temp file
        # file is temp because intermediate storage directory is temporary
        with open(os.path.join(temp_dir, "pipeline_config.yaml"), "w") as f:
            f.write(yaml.dump(run_config))

        kwargs = {
            "config": (os.path.join(temp_dir, "pipeline_config.yaml"), ),
            "pipeline":
            "asset_pipeline",
            "python_file":
            file_relative_path(
                __file__,
                "../../execution_tests/memoized_dev_loop_pipeline.py"),
            "tags":
            '{"dagster/is_memoized_run": "true"}',
        }

        with Capturing() as output:
            execute_list_versions_command(kwargs=kwargs, instance=instance)

        assert output

        # execute the pipeline once so that addresses have been populated.

        result = execute_pipeline(
            asset_pipeline,
            run_config=run_config,
            mode="only_mode",
            tags={"dagster/is_memoized_run": "true"},
            instance=instance,
        )
        assert result.success

        with Capturing() as output:
            execute_list_versions_command(kwargs=kwargs, instance=instance)

        assert output
예제 #28
0
def test_compute_log_manager(mock_s3_bucket):
    @op
    def easy(context):
        context.log.info("easy")
        print(HELLO_WORLD)  # pylint: disable=print-call
        return "easy"

    @job
    def simple():
        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        with environ({"DAGSTER_HOME": temp_dir}):
            run_store = SqliteRunStorage.from_local(temp_dir)
            event_store = SqliteEventLogStorage(temp_dir)
            manager = S3ComputeLogManager(bucket=mock_s3_bucket.name,
                                          prefix="my_prefix",
                                          local_dir=temp_dir)
            instance = DagsterInstance(
                instance_type=InstanceType.PERSISTENT,
                local_artifact_storage=LocalArtifactStorage(temp_dir),
                run_storage=run_store,
                event_storage=event_store,
                compute_log_manager=manager,
                run_coordinator=DefaultRunCoordinator(),
                run_launcher=DefaultRunLauncher(),
                ref=InstanceRef.from_dir(temp_dir),
            )
            result = simple.execute_in_process(instance=instance)
            compute_steps = [
                event.step_key for event in result.all_node_events
                if event.event_type == DagsterEventType.STEP_START
            ]
            assert len(compute_steps) == 1
            step_key = compute_steps[0]

            stdout = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDOUT)
            assert stdout.data == HELLO_WORLD + SEPARATOR

            stderr = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDERR)
            for expected in EXPECTED_LOGS:
                assert expected in stderr.data

            # Check S3 directly
            s3_object = mock_s3_bucket.Object(
                key=f"my_prefix/storage/{result.run_id}/compute_logs/easy.err")
            stderr_s3 = s3_object.get()["Body"].read().decode("utf-8")
            for expected in EXPECTED_LOGS:
                assert expected in stderr_s3

            # Check download behavior by deleting locally cached logs
            compute_logs_dir = os.path.join(temp_dir, result.run_id,
                                            "compute_logs")
            for filename in os.listdir(compute_logs_dir):
                os.unlink(os.path.join(compute_logs_dir, filename))

            stdout = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDOUT)
            assert stdout.data == HELLO_WORLD + SEPARATOR

            stderr = manager.read_logs_file(result.run_id, step_key,
                                            ComputeIOType.STDERR)
            for expected in EXPECTED_LOGS:
                assert expected in stderr.data
def test_compute_log_manager(mock_s3_bucket):
    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info("easy")
            print(HELLO_WORLD)  # pylint: disable=print-call
            return "easy"

        easy()

    with seven.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = SqliteEventLogStorage(temp_dir)
        manager = S3ComputeLogManager(
            bucket=mock_s3_bucket.name, prefix="my_prefix", local_dir=temp_dir
        )
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=manager,
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=DefaultRunLauncher(),
        )
        result = execute_pipeline(simple, instance=instance)
        compute_steps = [
            event.step_key
            for event in result.step_event_list
            if event.event_type == DagsterEventType.STEP_START
        ]
        assert len(compute_steps) == 1
        step_key = compute_steps[0]

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data

        # Check S3 directly
        s3_object = mock_s3_bucket.Object(
            key="{prefix}/storage/{run_id}/compute_logs/easy.err".format(
                prefix="my_prefix", run_id=result.run_id
            ),
        )
        stderr_s3 = six.ensure_str(s3_object.get()["Body"].read())
        for expected in EXPECTED_LOGS:
            assert expected in stderr_s3

        # Check download behavior by deleting locally cached logs
        compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs")
        for filename in os.listdir(compute_logs_dir):
            os.unlink(os.path.join(compute_logs_dir, filename))

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data
예제 #30
0
def test_compute_log_manager(
    mock_create_blob_client, mock_generate_blob_sas, storage_account, container, credential
):
    mock_generate_blob_sas.return_value = "fake-url"
    fake_client = FakeBlobServiceClient(storage_account)
    mock_create_blob_client.return_value = fake_client

    @pipeline
    def simple():
        @solid
        def easy(context):
            context.log.info("easy")
            print(HELLO_WORLD)  # pylint: disable=print-call
            return "easy"

        easy()

    with tempfile.TemporaryDirectory() as temp_dir:
        run_store = SqliteRunStorage.from_local(temp_dir)
        event_store = SqliteEventLogStorage(temp_dir)
        manager = AzureBlobComputeLogManager(
            storage_account=storage_account,
            container=container,
            prefix="my_prefix",
            local_dir=temp_dir,
            secret_key=credential,
        )
        instance = DagsterInstance(
            instance_type=InstanceType.PERSISTENT,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=run_store,
            event_storage=event_store,
            compute_log_manager=manager,
            run_coordinator=DefaultRunCoordinator(),
            run_launcher=SyncInMemoryRunLauncher(),
        )
        result = execute_pipeline(simple, instance=instance)
        compute_steps = [
            event.step_key
            for event in result.step_event_list
            if event.event_type == DagsterEventType.STEP_START
        ]
        assert len(compute_steps) == 1
        step_key = compute_steps[0]

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data

        # Check ADLS2 directly
        adls2_object = fake_client.get_blob_client(
            container=container,
            blob="{prefix}/storage/{run_id}/compute_logs/easy.err".format(
                prefix="my_prefix", run_id=result.run_id
            ),
        )
        adls2_stderr = adls2_object.download_blob().readall().decode("utf-8")
        for expected in EXPECTED_LOGS:
            assert expected in adls2_stderr

        # Check download behavior by deleting locally cached logs
        compute_logs_dir = os.path.join(temp_dir, result.run_id, "compute_logs")
        for filename in os.listdir(compute_logs_dir):
            os.unlink(os.path.join(compute_logs_dir, filename))

        stdout = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDOUT)
        assert stdout.data == HELLO_WORLD + SEPARATOR

        stderr = manager.read_logs_file(result.run_id, step_key, ComputeIOType.STDERR)
        for expected in EXPECTED_LOGS:
            assert expected in stderr.data