Esempio n. 1
0
def run_test_backfill(execution_args,
                      expected_count=None,
                      error_message=None,
                      use_run_launcher=True):
    runner = CliRunner()
    run_launcher = InMemoryRunLauncher() if use_run_launcher else None
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            run_launcher=run_launcher,
        )
        with mock.patch(
                'dagster.core.instance.DagsterInstance.get') as _instance:
            _instance.return_value = instance

            if error_message:
                with pytest.raises(UsageError) as error_info:
                    execute_backfill_command(
                        backfill_execute_args(execution_args), no_print)
                assert error_info and error_message in error_info.value.message

            result = runner.invoke(pipeline_backfill_command,
                                   backfill_cli_runner_args(execution_args))
            if error_message:
                assert result.exit_code == 2
            else:
                assert result.exit_code == 0
                if expected_count:
                    assert len(run_launcher.queue()) == expected_count
Esempio n. 2
0
def test_run_launcher():
    test_queue = InMemoryRunLauncher()

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            run_launcher=test_queue,
        )

    context = define_context_for_repository_yaml(
        path=script_relative_path('../repository.yaml'), instance=instance
    )

    result = execute_dagster_graphql(
        context=context,
        query=LAUNCH_PIPELINE_EXECUTION_MUTATION,
        variables={
            'executionParams': {'selector': {'name': 'no_config_pipeline'}, 'mode': 'default'}
        },
    )

    assert result.data['launchPipelineExecution']['__typename'] == 'LaunchPipelineExecutionSuccess'
    assert result.data['launchPipelineExecution']['run']['status'] == 'NOT_STARTED'

    run_id = result.data['launchPipelineExecution']['run']['runId']

    test_queue.run_one(instance)

    result = execute_dagster_graphql(context=context, query=RUN_QUERY, variables={'runId': run_id})
    assert result.data['pipelineRunOrError']['__typename'] == 'PipelineRun'
    assert result.data['pipelineRunOrError']['status'] == 'SUCCESS'
Esempio n. 3
0
def get_instance(temp_dir):
    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(temp_dir),
        run_storage=InMemoryRunStorage(),
        event_storage=InMemoryEventLogStorage(),
        compute_log_manager=NoOpComputeLogManager(temp_dir),
    )
Esempio n. 4
0
def test_get_all_schedules():

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesystemTestScheduler(temp_dir),
            run_launcher=SyncInMemoryRunLauncher(),
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        # Initialize scheduler
        repository = context.legacy_get_repository_definition()
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        schedule = instance.start_schedule(
            repository.name, "no_config_pipeline_hourly_schedule")

        # Query Scheduler + all Schedules
        scheduler_result = execute_dagster_graphql(context,
                                                   GET_SCHEDULES_QUERY)

        # These schedules are defined in dagster_graphql_tests/graphql/setup_scheduler.py
        # If you add a schedule there, be sure to update the number of schedules below
        assert scheduler_result.data
        assert scheduler_result.data['scheduler']
        assert scheduler_result.data['scheduler']['runningSchedules']
        assert len(
            scheduler_result.data['scheduler']['runningSchedules']) == 18

        for schedule in scheduler_result.data['scheduler']['runningSchedules']:
            if schedule['scheduleDefinition'][
                    'name'] == 'no_config_pipeline_hourly_schedule':
                assert schedule['status'] == 'RUNNING'

            if schedule['scheduleDefinition'][
                    'name'] == 'environment_dict_error_schedule':
                assert schedule['scheduleDefinition']['runConfigYaml'] is None
            elif schedule['scheduleDefinition'][
                    'name'] == 'invalid_config_schedule':
                assert (schedule['scheduleDefinition']['runConfigYaml'] ==
                        'solids:\n  takes_an_enum:\n    config: invalid\n')
            else:
                assert (schedule['scheduleDefinition']['runConfigYaml'] ==
                        'storage:\n  filesystem: {}\n')
def define_scheduler_instance(tempdir):
    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(tempdir),
        run_storage=InMemoryRunStorage(),
        event_storage=InMemoryEventLogStorage(),
        compute_log_manager=NoOpComputeLogManager(tempdir),
        schedule_storage=SqliteScheduleStorage.from_local(os.path.join(tempdir, 'schedules')),
        scheduler=SystemCronScheduler(os.path.join(tempdir, 'schedules')),
    )
def get_instance(temp_dir):
    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(temp_dir),
        run_storage=InMemoryRunStorage(),
        event_storage=InMemoryEventLogStorage(),
        scheduler=FilesystemTestScheduler(temp_dir),
        schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
        compute_log_manager=NoOpComputeLogManager(temp_dir),
    )
Esempio n. 7
0
def define_scheduler_instance():
    with seven.TemporaryDirectory() as temp_dir:
        yield DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesytemTestScheduler(temp_dir),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
        )
Esempio n. 8
0
def get_instance_with_launcher(temp_dir):
    test_queue = InMemoryRunLauncher()

    return DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(temp_dir),
        run_storage=InMemoryRunStorage(),
        event_storage=InMemoryEventLogStorage(),
        schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
        compute_log_manager=NoOpComputeLogManager(temp_dir),
        run_launcher=test_queue,
    )
def mocked_instance():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            run_launcher=InMemoryRunLauncher(),
        )
        with mock.patch('dagster.core.instance.DagsterInstance.get') as _instance:
            _instance.return_value = instance
            yield instance
Esempio n. 10
0
def dagster_instance(run_launcher, network_postgres):

    tempdir = DagsterInstance.temp_storage()

    postgres_url = 'postgresql://*****:*****@localhost:{network_postgres}/test'.format(
        network_postgres=network_postgres)
    instance = DagsterInstance(
        instance_type=InstanceType.EPHEMERAL,
        local_artifact_storage=LocalArtifactStorage(tempdir),
        run_storage=PostgresRunStorage(postgres_url),
        event_storage=PostgresEventLogStorage(postgres_url),
        compute_log_manager=NoOpComputeLogManager(
            compute_logs_directory(tempdir)),
        run_launcher=run_launcher,
    )
    return instance
Esempio n. 11
0
    def ephemeral(tempdir=None):
        from dagster.core.storage.event_log import InMemoryEventLogStorage
        from dagster.core.storage.root import LocalArtifactStorage
        from dagster.core.storage.runs import InMemoryRunStorage
        from dagster.core.storage.local_compute_log_manager import NoOpComputeLogManager

        if tempdir is None:
            tempdir = DagsterInstance.temp_storage()

        return DagsterInstance(
            InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(compute_logs_directory(tempdir)),
        )
Esempio n. 12
0
    def ephemeral(tempdir=None):
        from dagster.core.storage.event_log import InMemoryEventLogStorage
        from dagster.core.storage.runs import InMemoryRunStorage
        from dagster.core.storage.local_compute_log_manager import NoOpComputeLogManager

        if tempdir is None:
            tempdir = DagsterInstance.temp_storage()

        feature_set = _dagster_feature_set(tempdir)

        return DagsterInstance(
            InstanceType.EPHEMERAL,
            root_storage_dir=tempdir,
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(_compute_logs_base_directory(tempdir)),
            feature_set=feature_set,
        )
Esempio n. 13
0
def test_get_all_schedules():

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesytemTestScheduler(temp_dir),
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        # Initialize scheduler
        repository = context.get_repository()
        scheduler_handle = context.scheduler_handle
        scheduler_handle.up(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        schedule = instance.start_schedule(
            repository, "no_config_pipeline_hourly_schedule")

        # Query Scheduler + all Schedules
        scheduler_result = execute_dagster_graphql(context,
                                                   GET_SCHEDULES_QUERY)

        assert scheduler_result.data
        assert scheduler_result.data['scheduler']
        assert scheduler_result.data['scheduler']['runningSchedules']
        assert len(
            scheduler_result.data['scheduler']['runningSchedules']) == 11

        for schedule in scheduler_result.data['scheduler']['runningSchedules']:
            assert (schedule['scheduleDefinition']['environmentConfigYaml'] ==
                    'storage:\n  filesystem: {}\n')
Esempio n. 14
0
def test_start_stop_schedule():

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            schedule_storage=SqliteScheduleStorage.from_local(temp_dir),
            scheduler=FilesystemTestScheduler(temp_dir),
            run_launcher=SyncInMemoryRunLauncher(),
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        # Initialize scheduler
        repository = context.legacy_get_repository_definition()
        reconcile_scheduler_state(
            python_path=sys.executable,
            repository_path="",
            repository=repository,
            instance=instance,
        )

        # Start schedule
        start_result = execute_dagster_graphql(
            context,
            START_SCHEDULES_QUERY,
            variables={'scheduleName': 'no_config_pipeline_hourly_schedule'},
        )
        assert start_result.data['startSchedule']['schedule'][
            'status'] == 'RUNNING'

        # Stop schedule
        stop_result = execute_dagster_graphql(
            context,
            STOP_SCHEDULES_QUERY,
            variables={'scheduleName': 'no_config_pipeline_hourly_schedule'},
        )
        assert stop_result.data['stopRunningSchedule']['schedule'][
            'status'] == 'STOPPED'
def test_basic_start_scheduled_execution_with_run_launcher():
    test_queue = InMemoryRunLauncher()

    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            run_launcher=test_queue,
        )

        context = define_context_for_repository_yaml(path=file_relative_path(
            __file__, '../repository.yaml'),
                                                     instance=instance)

        scheduler_handle = context.scheduler_handle
        scheduler_handle.up(python_path=sys.executable,
                            repository_path=file_relative_path(
                                __file__, '../'))

        result = execute_dagster_graphql(
            context,
            START_SCHEDULED_EXECUTION_QUERY,
            variables={'scheduleName': 'no_config_pipeline_hourly_schedule'},
        )

        assert not result.errors
        assert result.data

        # just test existence
        assert (result.data['startScheduledExecution']['__typename'] ==
                'LaunchPipelineExecutionSuccess')

        assert uuid.UUID(
            result.data['startScheduledExecution']['run']['runId'])
        assert (result.data['startScheduledExecution']['run']['pipeline']
                ['name'] == 'no_config_pipeline')

        assert any(
            tag['key'] == 'dagster/schedule_name'
            and tag['value'] == 'no_config_pipeline_hourly_schedule'
            for tag in result.data['startScheduledExecution']['run']['tags'])
Esempio n. 16
0
def run_launch(execution_args, expected_count=None):
    runner = CliRunner()
    run_launcher = InMemoryRunLauncher()
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(temp_dir),
            run_storage=InMemoryRunStorage(),
            event_storage=InMemoryEventLogStorage(),
            compute_log_manager=NoOpComputeLogManager(temp_dir),
            run_launcher=run_launcher,
        )
        with mock.patch('dagster.core.instance.DagsterInstance.get') as _instance:
            _instance.return_value = instance

            result = runner.invoke(pipeline_launch_command, execution_args)
            assert result.exit_code == 0
            if expected_count:
                assert len(run_launcher.queue()) == expected_count
Esempio n. 17
0
def dagster_instance(helm_namespace, run_launcher):
    @contextmanager
    def local_port_forward_postgres():
        print('Port-forwarding postgres')
        postgres_pod_name = (check_output([
            'kubectl',
            'get',
            'pods',
            '--namespace',
            helm_namespace,
            '-l',
            'app=postgresql,release=dagster',
            '-o',
            'jsonpath="{.items[0].metadata.name}"',
        ]).decode('utf-8').strip('"'))
        forward_port = find_free_port()

        wait_for_pod(postgres_pod_name, namespace=helm_namespace)

        try:
            p = subprocess.Popen([
                'kubectl',
                'port-forward',
                '--namespace',
                helm_namespace,
                postgres_pod_name,
                '{forward_port}:5432'.format(forward_port=forward_port),
            ])

            # Validate port forwarding works
            start = time.time()

            while True:
                if time.time() - start > PG_PORT_FORWARDING_TIMEOUT:
                    raise Exception(
                        'Timed out while waiting for postgres port forwarding')

                print(
                    'Waiting for port forwarding from k8s pod %s:5432 to localhost:%d to be'
                    ' available...' % (postgres_pod_name, forward_port))
                try:
                    conn = psycopg2.connect(
                        database='test',
                        user='******',
                        password='******',
                        host='localhost',
                        port=forward_port,
                    )
                    conn.close()
                    break
                except:  # pylint: disable=bare-except, broad-except
                    pass
                time.sleep(1)

            yield forward_port

        finally:
            print('Terminating port-forwarding')
            p.terminate()

    tempdir = DagsterInstance.temp_storage()

    with local_port_forward_postgres() as local_forward_port:
        postgres_url = 'postgresql://*****:*****@localhost:{local_forward_port}/test'.format(
            local_forward_port=local_forward_port)
        print('Local Postgres forwarding URL: ', postgres_url)

        instance = DagsterInstance(
            instance_type=InstanceType.EPHEMERAL,
            local_artifact_storage=LocalArtifactStorage(tempdir),
            run_storage=PostgresRunStorage(postgres_url),
            event_storage=PostgresEventLogStorage(postgres_url),
            compute_log_manager=NoOpComputeLogManager(
                compute_logs_directory(tempdir)),
            run_launcher=run_launcher,
        )
        yield instance