Ejemplo n.º 1
0
def test_get_or_create_run():
    with seven.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))
        run = PipelineRun.create_empty_run('foo_pipeline', 'bar_run')
        assert instance.get_or_create_run(run) == run
        assert instance.has_run(run.run_id)
        assert instance.get_or_create_run(run) == run

    # Run is created after we check whether it exists
    with seven.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))
        run = PipelineRun.create_empty_run('foo_pipeline', 'bar_run')

        def _has_run(self, run_id):
            # This is uglier than we would like because there is no nonlocal keyword in py2
            global MOCK_HAS_RUN_CALLED  # pylint: disable=global-statement
            # pylint: disable=protected-access
            if not self._run_storage.has_run(
                    run_id) and not MOCK_HAS_RUN_CALLED:
                self._run_storage.add_run(
                    PipelineRun.create_empty_run('foo_pipeline', run_id))
                return False
            else:
                return self._run_storage.has_run(run_id)

        instance.has_run = types.MethodType(_has_run, instance)
        assert instance.get_or_create_run(run) == run

    # Run is created after we check whether it exists, but deleted before we can get it
    global MOCK_HAS_RUN_CALLED  # pylint:disable=global-statement
    MOCK_HAS_RUN_CALLED = False
    with seven.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))
        run = PipelineRun.create_empty_run('foo_pipeline', 'bar_run')

        def _has_run(self, run_id):
            global MOCK_HAS_RUN_CALLED  # pylint: disable=global-statement
            # pylint: disable=protected-access
            if not self._run_storage.has_run(
                    run_id) and not MOCK_HAS_RUN_CALLED:
                self._run_storage.add_run(
                    PipelineRun.create_empty_run('foo_pipeline', run_id))
                MOCK_HAS_RUN_CALLED = True
                return False
            elif self._run_storage.has_run(run_id) and MOCK_HAS_RUN_CALLED:
                MOCK_HAS_RUN_CALLED = False
                return True
            else:
                return False

        instance.has_run = types.MethodType(_has_run, instance)
        with pytest.raises(check.CheckError, match='Inconsistent run storage'):
            instance.get_or_create_run(run)
Ejemplo n.º 2
0
def test_two_runs_running():
    run_id_one = make_new_run_id()
    run_id_two = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'infinite_loop_pipeline')

    with safe_tempfile_path() as file_one, safe_tempfile_path() as file_two:
        instance = DagsterInstance.local_temp()

        execution_manager = SubprocessExecutionManager(instance)

        pipeline_run_one = instance.create_run(
            PipelineRun.create_empty_run(
                pipeline_name=infinite_loop_pipeline.name,
                run_id=run_id_one,
                environment_dict={'solids': {'loop': {'config': {'file': file_one}}}},
            )
        )
        execution_manager.execute_pipeline(
            handle, infinite_loop_pipeline, pipeline_run_one, instance
        )

        pipeline_run_two = instance.create_run(
            PipelineRun.create_empty_run(
                pipeline_name=infinite_loop_pipeline.name,
                run_id=run_id_two,
                environment_dict={'solids': {'loop': {'config': {'file': file_two}}}},
            )
        )

        execution_manager.execute_pipeline(
            handle, infinite_loop_pipeline, pipeline_run_two, instance
        )

        # ensure both runs have begun execution
        while not os.path.exists(file_one) and not os.path.exists(file_two):
            time.sleep(0.1)

        assert execution_manager.is_process_running(run_id_one)
        assert execution_manager.is_process_running(run_id_two)

        assert execution_manager.terminate(run_id_one)

        assert not execution_manager.is_process_running(run_id_one)
        assert execution_manager.is_process_running(run_id_two)

        assert execution_manager.terminate(run_id_two)

        assert not execution_manager.is_process_running(run_id_one)
        assert not execution_manager.is_process_running(run_id_two)
Ejemplo n.º 3
0
def test_roundtrip_run():
    run = PipelineRun(
        pipeline_name='pipey_mcpipeface',
        run_id='8675309',
        environment_dict={'good': True},
        mode='default',
        selector=ExecutionSelector('pipey_mcpipeface'),
        step_keys_to_execute=['step_1', 'step_2', 'step_3'],
        tags={'tag_it': 'bag_it'},
        status=PipelineRunStatus.NOT_STARTED,
        previous_run_id='previousID',
    )
    for field in run:
        # ensure we have a test value to round trip for each field
        assert field

    exec_params = execution_params_from_pipeline_run(run)
    assert run == pipeline_run_from_execution_params(exec_params)

    exec_params_gql = execution_params_from_graphql(
        exec_params.to_graphql_input())
    assert exec_params_gql == exec_params
    assert run == pipeline_run_from_execution_params(exec_params_gql)

    empty_run = PipelineRun.create_empty_run('foo', 'bar')
    exec_params = execution_params_from_pipeline_run(empty_run)
    assert empty_run == pipeline_run_from_execution_params(exec_params)

    exec_params_gql = execution_params_from_graphql(
        exec_params.to_graphql_input())
    assert exec_params_gql == exec_params
    assert empty_run == pipeline_run_from_execution_params(exec_params_gql)
Ejemplo n.º 4
0
def test_max_concurrency_zero():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'infinite_loop_pipeline')

    with safe_tempfile_path() as filepath:
        instance = DagsterInstance.local_temp()
        execution_manager = QueueingSubprocessExecutionManager(
            instance, max_concurrent_runs=0)

        pipeline_run = instance.create_run(
            PipelineRun.create_empty_run(
                pipeline_name=infinite_loop_pipeline.name,
                run_id=run_id,
                environment_dict={
                    'solids': {
                        'loop': {
                            'config': {
                                'file': filepath
                            }
                        }
                    }
                },
            ))
        execution_manager.execute_pipeline(handle, infinite_loop_pipeline,
                                           pipeline_run, instance)
        assert not execution_manager.is_active(run_id)
        assert not os.path.exists(filepath)
Ejemplo n.º 5
0
def test_single():
    instance = DagsterInstance.local_temp()
    pipeline_name = 'foo_pipeline'
    run_id = make_new_run_id()
    pipeline_run = PipelineRun.create_empty_run(pipeline_name, run_id)

    step_keys = ['A', 'B', 'C']

    with instance.compute_log_manager.watch(pipeline_run):
        print('outer 1')
        print('outer 2')
        print('outer 3')

        for step_key in step_keys:
            inner_step(instance, pipeline_run, step_key)

    for step_key in step_keys:
        stdout = instance.compute_log_manager.read_logs_file(
            run_id, step_key, ComputeIOType.STDOUT)
        assert normalize_file_content(
            stdout.data) == expected_inner_output(step_key)

    full_out = instance.compute_log_manager.read_logs_file(
        run_id, pipeline_name, ComputeIOType.STDOUT)

    assert normalize_file_content(full_out.data).startswith(
        expected_outer_prefix())
Ejemplo n.º 6
0
def test_multi():
    instance = DagsterInstance.local_temp()
    pipeline_name = 'foo_pipeline'
    run_id = make_new_run_id()
    pipeline_run = PipelineRun.create_empty_run(pipeline_name, run_id)
    context = get_multiprocessing_context()

    step_keys = ['A', 'B', 'C']

    with instance.compute_log_manager.watch(pipeline_run):
        print('outer 1')
        print('outer 2')
        print('outer 3')

        for step_key in step_keys:
            process = context.Process(target=execute_inner,
                                      args=(step_key, pipeline_run,
                                            instance.get_ref()))
            process.start()
            process.join()

    for step_key in step_keys:
        stdout = instance.compute_log_manager.read_logs_file(
            run_id, step_key, ComputeIOType.STDOUT)
        assert normalize_file_content(
            stdout.data) == expected_inner_output(step_key)

    full_out = instance.compute_log_manager.read_logs_file(
        run_id, pipeline_name, ComputeIOType.STDOUT)

    # The way that the multiprocess compute-logging interacts with pytest (which stubs out the
    # sys.stdout fileno) makes this difficult to test.  The pytest-captured stdout only captures
    # the stdout from the outer process, not also the inner process
    assert normalize_file_content(full_out.data).startswith(
        expected_outer_prefix())
Ejemplo n.º 7
0
def test_s3_pipeline_with_custom_prefix(s3_bucket):
    run_id = make_new_run_id()
    s3_prefix = 'custom_prefix'

    pipe = define_inty_pipeline(should_throw=False)
    environment_dict = {
        'storage': {'s3': {'config': {'s3_bucket': s3_bucket, 's3_prefix': s3_prefix}}}
    }

    pipeline_run = PipelineRun.create_empty_run(
        pipe.name, run_id=run_id, environment_dict=environment_dict
    )
    instance = DagsterInstance.ephemeral()

    result = execute_pipeline(
        pipe, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id),
    )
    assert result.success

    execution_plan = create_execution_plan(pipe, environment_dict, RunConfig(run_id=run_id))
    with scoped_pipeline_context(
        pipe, environment_dict, pipeline_run, instance, execution_plan
    ) as context:
        store = S3IntermediateStore(
            run_id=run_id,
            s3_bucket=s3_bucket,
            s3_prefix=s3_prefix,
            s3_session=context.scoped_resources_builder.build(required_resource_keys={'s3'}).s3,
        )
        assert store.root == '/'.join(['custom_prefix', 'storage', run_id])
        assert store.get_intermediate(context, 'return_one.compute', Int).obj == 1
        assert store.get_intermediate(context, 'add_one.compute', Int).obj == 2
Ejemplo n.º 8
0
def test_execution_plan_for_composite_solid():
    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid': {
                'solids': {'node_a': {'config': {'foo': 'baz'}}, 'node_b': {'config': {'bar': 3}}}
            }
        }
    }
    execution_plan = create_execution_plan(composite_pipeline, environment_dict=environment_dict)
    pipeline_run = PipelineRun.create_empty_run(composite_pipeline.name, make_new_run_id())
    events = execute_plan(
        execution_plan,
        environment_dict=environment_dict,
        pipeline_run=pipeline_run,
        instance=DagsterInstance.ephemeral(),
    )

    assert [e.event_type_value for e in events] == [
        'STEP_START',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
        'STEP_START',
        'STEP_INPUT',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
    ]
Ejemplo n.º 9
0
    def test_single_write_read_with_snapshot(self, storage):
        if not isinstance(storage, InMemoryRunStorage):
            pytest.skip()

        run_with_snapshot_id = 'lkasjdflkjasdf'
        pipeline_def = PipelineDefinition(name='some_pipeline', solid_defs=[])

        pipeline_snapshot = pipeline_def.get_pipeline_snapshot()

        pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)

        run_with_snapshot = PipelineRun.create_empty_run(
            run_id=run_with_snapshot_id,
            pipeline_name=pipeline_def.name,
            pipeline_snapshot_id=pipeline_snapshot_id,
        )

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)

        assert storage.add_pipeline_snapshot(pipeline_snapshot) == pipeline_snapshot_id

        assert storage.get_pipeline_snapshot(pipeline_snapshot_id) == pipeline_snapshot

        storage.add_run(run_with_snapshot)

        assert storage.get_run_by_id(run_with_snapshot_id) == run_with_snapshot

        storage.wipe()

        assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)
        assert not storage.has_run(run_with_snapshot_id)
Ejemplo n.º 10
0
def test_valid_job_format(kubeconfig, docker_image, image_pull_policy):  # pylint: disable=redefined-outer-name
    run_id = uuid.uuid4().hex
    environment_dict = load_yaml_from_path(
        os.path.join(environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun.create_empty_run(pipeline_name, run_id, environment_dict)
    run_launcher = K8sRunLauncher(
        image_pull_policy=image_pull_policy,
        image_pull_secrets=[{
            'name': 'element-dev-key'
        }],
        service_account_name='dagit-admin',
        instance_config_map='dagster-instance',
        job_image=docker_image,
        load_kubeconfig=True,
        kubeconfig_file=kubeconfig,
    )

    job = run_launcher.construct_job(run)

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run_id,
            job_image=docker_image,
            image_pull_policy=image_pull_policy,
            dagster_version=dagster_version,
        ).strip())
Ejemplo n.º 11
0
def test_execution_plan_simple_two_steps():
    pipeline_def = define_two_int_pipeline()
    execution_plan = create_execution_plan(pipeline_def)

    assert isinstance(execution_plan.steps, list)
    assert len(execution_plan.steps) == 2

    assert execution_plan.get_step_by_key('return_one.compute')
    assert execution_plan.get_step_by_key('add_one.compute')

    pipeline_run = PipelineRun.create_empty_run(pipeline_def.name, make_new_run_id())
    events = execute_plan(
        execution_plan, pipeline_run=pipeline_run, instance=DagsterInstance.ephemeral()
    )
    # start, out, success, start, input, out, success
    assert [e.event_type_value for e in events] == [
        'STEP_START',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
        'STEP_START',
        'STEP_INPUT',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
    ]

    output_events = [e for e in events if e.event_type_value == 'STEP_OUTPUT']

    assert output_events[0].step_key == 'return_one.compute'
    assert output_events[0].is_successful_output

    assert output_events[1].step_key == 'add_one.compute'
    assert output_events[1].is_successful_output
Ejemplo n.º 12
0
def test_execute_step_wrong_step_key():
    pipeline = define_inty_pipeline()
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline)
    pipeline_run = PipelineRun.create_empty_run(pipeline.name, make_new_run_id())

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_plan(
            execution_plan.build_subset_plan(['nope']), instance, pipeline_run=pipeline_run
        )

    assert exc_info.value.step_keys == ['nope']

    assert str(exc_info.value) == 'Execution plan does not contain step: nope'

    with pytest.raises(DagsterExecutionStepNotFoundError) as exc_info:
        execute_plan(
            execution_plan.build_subset_plan(['nope', 'nuh_uh']),
            instance,
            pipeline_run=pipeline_run,
        )

    assert exc_info.value.step_keys == ['nope', 'nuh_uh']

    assert str(exc_info.value) == 'Execution plan does not contain steps: nope, nuh_uh'
Ejemplo n.º 13
0
def test_k8s_run_launcher_celery(dagster_instance):  # pylint: disable=redefined-outer-name
    run_id = uuid.uuid4().hex
    environment_dict = merge_dicts(
        merge_yamls([
            os.path.join(environments_path(), 'env.yaml'),
            os.path.join(environments_path(), 'env_filesystem.yaml'),
        ]),
        get_celery_engine_config(),
    )

    assert 'celery' in environment_dict['execution']

    pipeline_name = 'demo_pipeline_celery'
    tags = {'key': 'value'}
    run = PipelineRun.create_empty_run(pipeline_name, run_id, environment_dict,
                                       tags)

    dagster_instance.launch_run(run)
    success, raw_logs = wait_for_job_success('dagster-job-%s' % run_id)
    result = parse_raw_res(raw_logs.split('\n'))

    assert success
    assert not result.get('errors')
    assert result['data']
    # this is bad test but proves that we got celery configured properly
    # to get it working would involve relying on s3 / gcs for storage
    assert result['data']['startPipelineExecutionForCreatedRun'][
        '__typename'] == 'PythonError'
    assert (
        'Must use S3 or GCS storage with non-local Celery broker: pyamqp://test:test@dagster-rabbitmq:5672// and backend: amqp'
        in result['data']['startPipelineExecutionForCreatedRun']['message'])
Ejemplo n.º 14
0
def create_test_pipeline_execution_context(logger_defs=None):
    run_id = make_new_run_id()
    loggers = check.opt_dict_param(
        logger_defs, 'logger_defs', key_type=str, value_type=LoggerDefinition
    )
    mode_def = ModeDefinition(logger_defs=loggers)
    pipeline_def = PipelineDefinition(
        name='test_legacy_context', solid_defs=[], mode_defs=[mode_def]
    )
    environment_dict = {'loggers': {key: {} for key in loggers}}
    pipeline_run = PipelineRun.create_empty_run('test_legacy_context', run_id, environment_dict)
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline_def, environment_dict, pipeline_run)
    creation_data = create_context_creation_data(
        pipeline_def, environment_dict, pipeline_run, instance, execution_plan
    )
    log_manager = create_log_manager(creation_data)
    scoped_resources_builder = ScopedResourcesBuilder()
    executor_config = create_executor_config(creation_data)
    return construct_pipeline_execution_context(
        context_creation_data=creation_data,
        scoped_resources_builder=scoped_resources_builder,
        system_storage_data=SystemStorageData(
            intermediates_manager=InMemoryIntermediatesManager(),
            file_manager=LocalFileManager.for_instance(instance, run_id),
        ),
        log_manager=log_manager,
        executor_config=executor_config,
        raise_on_error=True,
    )
Ejemplo n.º 15
0
def test_has_run_query_and_terminate():
    run_id_one = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'infinite_loop_pipeline')

    instance = DagsterInstance.local_temp()

    with safe_tempfile_path() as path:
        pipeline_run = instance.create_run(
            PipelineRun.create_empty_run(
                pipeline_name=infinite_loop_pipeline.name,
                run_id=run_id_one,
                environment_dict={'solids': {'loop': {'config': {'file': path}}}},
            )
        )
        execution_manager = SubprocessExecutionManager(instance)
        execution_manager.execute_pipeline(handle, infinite_loop_pipeline, pipeline_run, instance)

        while not os.path.exists(path):
            time.sleep(0.1)

        assert os.path.exists(path)

        assert execution_manager.is_process_running(run_id_one)
        assert execution_manager.terminate(run_id_one)
        assert instance.get_run_by_id(run_id_one).is_finished
        assert not execution_manager.is_process_running(run_id_one)
        assert not execution_manager.terminate(run_id_one)

    assert not os.path.exists(path)
def test_gcs_pipeline_with_custom_prefix(gcs_bucket):
    run_id = str(uuid.uuid4())
    gcs_prefix = 'custom_prefix'

    pipe = define_inty_pipeline(should_throw=False)
    environment_dict = {
        'storage': {'gcs': {'config': {'gcs_bucket': gcs_bucket, 'gcs_prefix': gcs_prefix}}}
    }

    pipeline_run = PipelineRun.create_empty_run(
        pipe.name, run_id=run_id, environment_dict=environment_dict
    )
    instance = DagsterInstance.ephemeral()

    result = execute_pipeline(
        pipe, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id),
    )
    assert result.success

    execution_plan = create_execution_plan(pipe, environment_dict, run_config=pipeline_run)
    with scoped_pipeline_context(
        pipe, environment_dict, pipeline_run, instance, execution_plan
    ) as context:
        store = GCSIntermediateStore(
            run_id=run_id,
            gcs_bucket=gcs_bucket,
            gcs_prefix=gcs_prefix,
            client=context.scoped_resources_builder.build(
                mapper_fn=SolidInvocation.default_resource_mapper_fn,
                required_resource_keys={'gcs'},
            ).gcs.client,
        )
        assert store.root == '/'.join(['custom_prefix', 'storage', run_id])
        assert store.get_intermediate(context, 'return_one.compute', Int).obj == 1
        assert store.get_intermediate(context, 'add_one.compute', Int).obj == 2
Ejemplo n.º 17
0
def test_execution_plan_for_composite_solid_with_config_mapping():
    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid_and_config_mapping': {
                'config': {
                    'foo': 'baz',
                    'bar': 3
                }
            }
        }
    }
    execution_plan = create_execution_plan(
        composite_pipeline_with_config_mapping,
        environment_dict=environment_dict)
    pipeline_run = PipelineRun.create_empty_run(
        composite_pipeline_with_config_mapping.name, str(uuid.uuid4()))

    events = execute_plan(
        execution_plan,
        environment_dict=environment_dict,
        pipeline_run=pipeline_run,
        instance=DagsterInstance.ephemeral(),
    )

    assert [e.event_type_value for e in events] == [
        'ENGINE_EVENT',
        'STEP_START',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
        'STEP_START',
        'STEP_INPUT',
        'STEP_OUTPUT',
        'STEP_SUCCESS',
        'ENGINE_EVENT',
    ]
Ejemplo n.º 18
0
def yield_empty_pipeline_context(run_id=None, instance=None):
    with scoped_pipeline_context(
        PipelineDefinition([]),
        {},
        PipelineRun.create_empty_run('empty', run_id=run_id),
        instance or DagsterInstance.ephemeral(),
    ) as context:
        yield context
Ejemplo n.º 19
0
def test_get_or_create_run():
    instance = DagsterInstance.ephemeral()

    assert instance.get_runs() == []
    pipeline_run = PipelineRun.create_empty_run('foo_pipeline', 'new_run')
    assert instance.get_or_create_run(pipeline_run) == pipeline_run

    assert instance.get_runs() == [pipeline_run]

    assert instance.get_or_create_run(pipeline_run) == pipeline_run

    assert instance.get_runs() == [pipeline_run]

    conflicting_pipeline_run = PipelineRun.create_empty_run('bar_pipeline', 'new_run')

    with pytest.raises(DagsterRunConflict, match='Found conflicting existing run with same id.'):
        instance.get_or_create_run(conflicting_pipeline_run)
def test_using_s3_for_subplan(s3_bucket):
    pipeline_def = define_inty_pipeline()

    environment_dict = {
        'storage': {
            's3': {
                'config': {
                    's3_bucket': s3_bucket
                }
            }
        }
    }

    run_id = str(uuid.uuid4())

    execution_plan = create_execution_plan(pipeline_def,
                                           environment_dict=environment_dict,
                                           run_config=RunConfig(run_id=run_id))

    assert execution_plan.get_step_by_key('return_one.compute')

    step_keys = ['return_one.compute']
    instance = DagsterInstance.ephemeral()
    pipeline_run = PipelineRun.create_empty_run(
        pipeline_def.name, run_id=run_id, environment_dict=environment_dict)

    return_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(step_keys),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(return_one_step_events, 'return_one.compute')
    with scoped_pipeline_context(pipeline_def, environment_dict, pipeline_run,
                                 instance) as context:
        store = S3IntermediateStore(
            s3_bucket,
            run_id,
            s3_session=context.scoped_resources_builder.build().s3.session)
        assert store.has_intermediate(context, 'return_one.compute')
        assert store.get_intermediate(context, 'return_one.compute',
                                      Int).obj == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan.build_subset_plan(['add_one.compute']),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
            instance=instance,
        ))

    assert get_step_output(add_one_step_events, 'add_one.compute')
    with scoped_pipeline_context(pipeline_def, environment_dict, pipeline_run,
                                 instance) as context:
        assert store.has_intermediate(context, 'add_one.compute')
        assert store.get_intermediate(context, 'add_one.compute', Int).obj == 2
Ejemplo n.º 21
0
    def test_write_conflicting_run_id(self, storage):
        double_run_id = 'double_run_id'
        pipeline_def = PipelineDefinition(name='some_pipeline', solid_defs=[])

        run = PipelineRun.create_empty_run(run_id=double_run_id, pipeline_name=pipeline_def.name)

        assert storage.add_run(run)
        with pytest.raises(DagsterRunAlreadyExists):
            storage.add_run(run)
Ejemplo n.º 22
0
def test_handle_run_event_pipeline_success_test():

    run_storage = PostgresRunStorage.create_nuked_storage(
        get_test_conn_string())

    run_id = str(uuid.uuid4())
    run_to_add = PipelineRun.create_empty_run(pipeline_name='pipeline_name',
                                              run_id=run_id)
    run_storage.add_run(run_to_add)

    dagster_pipeline_start_event = DagsterEvent(
        message='a message',
        event_type_value=DagsterEventType.PIPELINE_START.value,
        pipeline_name='pipeline_name',
        step_key=None,
        solid_handle=None,
        step_kind_value=None,
        logging_tags=None,
    )

    run_storage.handle_run_event(run_id, dagster_pipeline_start_event)

    assert run_storage.get_run_by_id(
        run_id).status == PipelineRunStatus.STARTED

    run_storage.handle_run_event(
        str(uuid.uuid4()),  # diff run
        DagsterEvent(
            message='a message',
            event_type_value=DagsterEventType.PIPELINE_SUCCESS.value,
            pipeline_name='pipeline_name',
            step_key=None,
            solid_handle=None,
            step_kind_value=None,
            logging_tags=None,
        ),
    )

    assert run_storage.get_run_by_id(
        run_id).status == PipelineRunStatus.STARTED

    run_storage.handle_run_event(
        run_id,  # correct run
        DagsterEvent(
            message='a message',
            event_type_value=DagsterEventType.PIPELINE_SUCCESS.value,
            pipeline_name='pipeline_name',
            step_key=None,
            solid_handle=None,
            step_kind_value=None,
            logging_tags=None,
        ),
    )

    assert run_storage.get_run_by_id(
        run_id).status == PipelineRunStatus.SUCCESS
Ejemplo n.º 23
0
def yield_empty_pipeline_context(run_id=None, instance=None):
    pipeline = PipelineDefinition([])
    with scoped_pipeline_context(
        pipeline,
        {},
        PipelineRun.create_empty_run('empty', run_id=run_id if run_id is not None else 'TESTING',),
        instance or DagsterInstance.ephemeral(),
        create_execution_plan(pipeline),
    ) as context:
        yield context
Ejemplo n.º 24
0
 def _has_run(self, run_id):
     # This is uglier than we would like because there is no nonlocal keyword in py2
     global MOCK_HAS_RUN_CALLED  # pylint: disable=global-statement
     # pylint: disable=protected-access
     if not self._run_storage.has_run(
             run_id) and not MOCK_HAS_RUN_CALLED:
         self._run_storage.add_run(
             PipelineRun.create_empty_run('foo_pipeline', run_id))
         return False
     else:
         return self._run_storage.has_run(run_id)
Ejemplo n.º 25
0
def test_valid_job_format(run_launcher, docker_image, environments_path):  # pylint: disable=redefined-outer-name
    run_id = uuid.uuid4().hex
    environment_dict = load_yaml_from_path(
        os.path.join(environments_path, 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun.create_empty_run(pipeline_name, run_id, environment_dict)
    job = run_launcher.construct_job(run)

    assert (yaml.dump(
        remove_none_recursively(job.to_dict()),
        default_flow_style=False).strip() == EXPECTED_JOB_SPEC.format(
            run_id=run_id, job_image=docker_image).strip())
Ejemplo n.º 26
0
def test_k8s_run_launcher(dagster_instance):  # pylint: disable=redefined-outer-name
    run_id = uuid.uuid4().hex
    environment_dict = load_yaml_from_path(os.path.join(environments_path(), 'env.yaml'))
    pipeline_name = 'demo_pipeline'
    run = PipelineRun.create_empty_run(pipeline_name, run_id, environment_dict)

    dagster_instance.launch_run(run)
    success, raw_logs = wait_for_job_success('dagster-job-%s' % run_id)
    result = parse_raw_res(raw_logs.split('\n'))

    assert success
    assert not result.get('errors')
    assert result['data']
    assert result['data']['startPipelineExecution']['__typename'] == 'StartPipelineExecutionSuccess'
Ejemplo n.º 27
0
    def test_wipe_tags(self, storage):
        run_id = 'some_run_id'
        run = PipelineRun.create_empty_run(
            run_id=run_id, pipeline_name='a_pipeline', tags={'foo': 'bar'}
        )

        storage.add_run(run)

        assert storage.get_run_by_id(run_id) == run
        assert dict(storage.get_run_tags()) == {'foo': {'bar'}}

        storage.wipe()
        assert list(storage.get_runs()) == []
        assert dict(storage.get_run_tags()) == {}
Ejemplo n.º 28
0
def test_using_file_system_for_subplan_missing_input():
    pipeline = define_inty_pipeline()
    environment_dict = {'storage': {'filesystem': {}}}

    execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict)
    pipeline_run = PipelineRun.create_empty_run(pipeline.name, make_new_run_id())

    with pytest.raises(DagsterStepOutputNotFoundError):
        execute_plan(
            execution_plan.build_subset_plan(['add_one.compute']),
            DagsterInstance.ephemeral(),
            environment_dict=environment_dict,
            pipeline_run=pipeline_run,
        )
Ejemplo n.º 29
0
 def _has_run(self, run_id):
     global MOCK_HAS_RUN_CALLED  # pylint: disable=global-statement
     # pylint: disable=protected-access
     if not self._run_storage.has_run(
             run_id) and not MOCK_HAS_RUN_CALLED:
         self._run_storage.add_run(
             PipelineRun.create_empty_run('foo_pipeline', run_id))
         MOCK_HAS_RUN_CALLED = True
         return False
     elif self._run_storage.has_run(run_id) and MOCK_HAS_RUN_CALLED:
         MOCK_HAS_RUN_CALLED = False
         return True
     else:
         return False
Ejemplo n.º 30
0
def test_nuke():
    storage = SqliteRunStorage.mem()

    assert storage
    run_id = str(uuid.uuid4())

    storage.add_run(
        PipelineRun.create_empty_run(run_id=run_id,
                                     pipeline_name='some_pipeline'))

    assert len(storage.all_runs) == 1

    storage.wipe()

    assert list(storage.all_runs) == []