Beispiel #1
0
def test_custom_dagster_dataframe_hydration_ok():
    input_dataframe = DataFrame({'foo': [1, 2, 3]})
    with safe_tempfile_path() as input_csv_fp, safe_tempfile_path() as output_csv_fp:
        input_dataframe.to_csv(input_csv_fp)
        TestDataFrame = create_dagster_pandas_dataframe_type(
            name='TestDataFrame', columns=[PandasColumn.exists('foo'),]
        )

        @solid(
            input_defs=[InputDefinition('test_df', TestDataFrame)],
            output_defs=[OutputDefinition(TestDataFrame)],
        )
        def use_test_dataframe(_, test_df):
            test_df['bar'] = [2, 4, 6]
            return test_df

        solid_result = execute_solid(
            use_test_dataframe,
            run_config={
                'solids': {
                    'use_test_dataframe': {
                        'inputs': {'test_df': {'csv': {'path': input_csv_fp}}},
                        'outputs': [{'result': {'csv': {'path': output_csv_fp}}},],
                    }
                }
            },
        )

        assert solid_result.success
        solid_output_df = read_csv(output_csv_fp)
        assert all(solid_output_df['bar'] == [2, 4, 6])
Beispiel #2
0
def test_logging():

    handle = handle_for_pipeline_cli_args({
        'python_file':
        script_relative_path('./test_logging.py'),
        'fn_name':
        'define_hello_logging_pipeline',
    })

    pipeline_def = handle.build_pipeline_definition()

    with safe_tempfile_path() as test_file_path:
        with safe_tempfile_path() as critical_file_path:
            execute_pipeline(
                pipeline_def,
                {
                    'loggers': {
                        'test': {
                            'config': {
                                'name': 'test',
                                'file_path': test_file_path,
                                'log_level': 'DEBUG',
                            }
                        },
                        'critical': {
                            'config': {
                                'name': 'critical',
                                'file_path': critical_file_path,
                                'log_level': 'CRITICAL',
                            }
                        },
                    }
                },
                instance=DagsterInstance.local_temp(),
            )

            with open(test_file_path, 'r') as test_file:
                records = [
                    json.loads(line)
                    for line in test_file.read().strip('\n').split('\n')
                    if line
                ]

            with open(critical_file_path, 'r') as critical_file:
                critical_records = [
                    json.loads(line)
                    for line in critical_file.read().strip('\n').split('\n')
                    if line
                ]

    messages = [x['dagster_meta']['orig_message'] for x in records]

    assert 'Hello, there!' in messages

    critical_messages = [
        x['dagster_meta']['orig_message'] for x in critical_records
    ]

    assert 'Hello, there!' not in critical_messages
Beispiel #3
0
def test_two_runs_running():
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'infinite_loop_pipeline')

    with safe_tempfile_path() as file_one, safe_tempfile_path() as file_two:
        instance = DagsterInstance.local_temp()

        execution_manager = SubprocessExecutionManager(instance)

        pipeline_run_one = instance.create_run_for_pipeline(
            pipeline_def=infinite_loop_pipeline,
            environment_dict={
                'solids': {
                    'loop': {
                        'config': {
                            'file': file_one
                        }
                    }
                }
            },
        )
        execution_manager.execute_pipeline(handle, infinite_loop_pipeline,
                                           pipeline_run_one, instance)

        pipeline_run_two = instance.create_run_for_pipeline(
            pipeline_def=infinite_loop_pipeline,
            environment_dict={
                'solids': {
                    'loop': {
                        'config': {
                            'file': file_two
                        }
                    }
                }
            },
        )

        execution_manager.execute_pipeline(handle, infinite_loop_pipeline,
                                           pipeline_run_two, instance)

        # ensure both runs have begun execution
        while not os.path.exists(file_one) and not os.path.exists(file_two):
            time.sleep(0.1)

        assert execution_manager.is_process_running(pipeline_run_one.run_id)
        assert execution_manager.is_process_running(pipeline_run_two.run_id)

        assert execution_manager.terminate(pipeline_run_one.run_id)

        assert not execution_manager.is_process_running(
            pipeline_run_one.run_id)
        assert execution_manager.is_process_running(pipeline_run_two.run_id)

        assert execution_manager.terminate(pipeline_run_two.run_id)

        assert not execution_manager.is_process_running(
            pipeline_run_one.run_id)
        assert not execution_manager.is_process_running(
            pipeline_run_two.run_id)
Beispiel #4
0
def test_max_concurrency_one():
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'infinite_loop_pipeline')

    pipeline_def = handle.build_pipeline_definition()

    with safe_tempfile_path() as file_one, safe_tempfile_path() as file_two:
        instance = DagsterInstance.local_temp()
        execution_manager = QueueingSubprocessExecutionManager(
            instance, max_concurrent_runs=1)

        run_one = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            environment_dict={
                'solids': {
                    'loop': {
                        'config': {
                            'file': file_one
                        }
                    }
                }
            },
        )
        run_two = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            environment_dict={
                'solids': {
                    'loop': {
                        'config': {
                            'file': file_two
                        }
                    }
                }
            },
        )

        execution_manager.execute_pipeline(handle, infinite_loop_pipeline,
                                           run_one, instance)
        execution_manager.execute_pipeline(handle, infinite_loop_pipeline,
                                           run_two, instance)

        while not os.path.exists(file_one):
            execution_manager.check()
            time.sleep(0.1)

        assert execution_manager.is_active(run_one.run_id)
        assert not execution_manager.is_active(run_two.run_id)
        assert not os.path.exists(file_two)

        assert execution_manager.terminate(run_one.run_id)

        while not os.path.exists(file_two):
            execution_manager.check()
            time.sleep(0.1)

        assert not execution_manager.is_active(run_one.run_id)
        assert execution_manager.is_active(run_two.run_id)
        assert execution_manager.terminate(run_two.run_id)
Beispiel #5
0
def cli_api_execute_run(instance, pipeline_origin, pipeline_run):
    with safe_tempfile_path() as output_file:
        with safe_tempfile_path() as input_file:
            _process = _cli_api_execute_run_process(
                input_file, output_file, instance, pipeline_origin, pipeline_run
            )
            event_list = list(ipc_read_event_stream(output_file))
            check.inst(event_list[0], ExecuteRunArgsLoadComplete)
            return event_list[1:]
def test_two_runs_running():
    with safe_tempfile_path() as file_one, safe_tempfile_path(
    ) as file_two, temp_instance() as instance:
        pipeline_run_one = instance.create_run_for_pipeline(
            pipeline_def=infinite_loop_pipeline,
            environment_dict={
                'solids': {
                    'loop': {
                        'config': {
                            'file': file_one
                        }
                    }
                }
            },
        )
        instance.launch_run(
            pipeline_run_one.run_id,
            _external_pipeline_from_def(infinite_loop_pipeline))

        pipeline_run_two = instance.create_run_for_pipeline(
            pipeline_def=infinite_loop_pipeline,
            environment_dict={
                'solids': {
                    'loop': {
                        'config': {
                            'file': file_two
                        }
                    }
                }
            },
        )

        instance.launch_run(
            pipeline_run_two.run_id,
            _external_pipeline_from_def(infinite_loop_pipeline))

        # ensure both runs have begun execution
        while not os.path.exists(file_one) and not os.path.exists(file_two):
            time.sleep(0.1)

        run_launcher = instance.run_launcher

        assert run_launcher.can_terminate(pipeline_run_one.run_id)
        assert run_launcher.can_terminate(pipeline_run_two.run_id)

        assert run_launcher.terminate(pipeline_run_one.run_id)

        assert not run_launcher.can_terminate(pipeline_run_one.run_id)
        assert run_launcher.can_terminate(pipeline_run_two.run_id)

        assert run_launcher.terminate(pipeline_run_two.run_id)

        assert not run_launcher.can_terminate(pipeline_run_one.run_id)
        assert not run_launcher.can_terminate(pipeline_run_two.run_id)
Beispiel #7
0
def test_custom_dagster_dataframe_loading_ok():
    input_dataframe = DataFrame({"foo": [1, 2, 3]})
    with safe_tempfile_path() as input_csv_fp, safe_tempfile_path(
    ) as output_csv_fp:
        input_dataframe.to_csv(input_csv_fp)
        TestDataFrame = create_dagster_pandas_dataframe_type(
            name="TestDataFrame",
            columns=[
                PandasColumn.exists("foo"),
            ],
        )

        @op(
            ins={"test_df": In(TestDataFrame)},
            out=Out(TestDataFrame),
        )
        def use_test_dataframe(_, test_df):
            test_df["bar"] = [2, 4, 6]
            return test_df

        @graph
        def basic_graph():
            use_test_dataframe()

        result = basic_graph.execute_in_process(
            run_config={
                "ops": {
                    "use_test_dataframe": {
                        "inputs": {
                            "test_df": {
                                "csv": {
                                    "path": input_csv_fp
                                }
                            }
                        },
                        "outputs": [
                            {
                                "result": {
                                    "csv": {
                                        "path": output_csv_fp
                                    }
                                }
                            },
                        ],
                    }
                }
            })
        assert result.success
        output_df = read_csv(output_csv_fp)
        assert all(output_df["bar"] == [2, 4, 6])
def test_two_runs_running():
    with safe_tempfile_path() as file_one, safe_tempfile_path(
    ) as file_two, instance_for_test() as instance:
        pipeline_run_one = instance.create_run_for_pipeline(
            pipeline_def=infinite_loop_pipeline,
            run_config={"solids": {
                "loop": {
                    "config": {
                        "file": file_one
                    }
                }
            }},
        )
        instance.launch_run(
            pipeline_run_one.run_id,
            _external_pipeline_from_def(infinite_loop_pipeline))

        pipeline_run_two = instance.create_run_for_pipeline(
            pipeline_def=infinite_loop_pipeline,
            run_config={"solids": {
                "loop": {
                    "config": {
                        "file": file_two
                    }
                }
            }},
        )

        instance.launch_run(
            pipeline_run_two.run_id,
            _external_pipeline_from_def(infinite_loop_pipeline))

        # ensure both runs have begun execution
        while not os.path.exists(file_one) and not os.path.exists(file_two):
            time.sleep(0.1)

        run_launcher = instance.run_launcher

        assert run_launcher.can_terminate(pipeline_run_one.run_id)
        assert run_launcher.can_terminate(pipeline_run_two.run_id)

        assert run_launcher.terminate(pipeline_run_one.run_id)

        assert not run_launcher.can_terminate(pipeline_run_one.run_id)
        assert run_launcher.can_terminate(pipeline_run_two.run_id)

        assert run_launcher.terminate(pipeline_run_two.run_id)

        assert not run_launcher.can_terminate(pipeline_run_one.run_id)
        assert not run_launcher.can_terminate(pipeline_run_two.run_id)
Beispiel #9
0
def test_logging():
    with safe_tempfile_path() as test_file_path:
        with safe_tempfile_path() as critical_file_path:
            with instance_for_test() as instance:
                execute_pipeline(
                    reconstructable(define_hello_logging_pipeline),
                    {
                        "loggers": {
                            "test": {
                                "config": {
                                    "name": "test",
                                    "file_path": test_file_path,
                                    "log_level": "DEBUG",
                                }
                            },
                            "critical": {
                                "config": {
                                    "name": "critical",
                                    "file_path": critical_file_path,
                                    "log_level": "CRITICAL",
                                }
                            },
                        }
                    },
                    instance=instance,
                )

                with open(test_file_path, "r") as test_file:
                    records = [
                        json.loads(line)
                        for line in test_file.read().strip("\n").split("\n")
                        if line
                    ]

                with open(critical_file_path, "r") as critical_file:
                    critical_records = [
                        json.loads(line) for line in
                        critical_file.read().strip("\n").split("\n") if line
                    ]

    messages = [x["dagster_meta"]["orig_message"] for x in records]

    assert "Hello, there!" in messages

    critical_messages = [
        x["dagster_meta"]["orig_message"] for x in critical_records
    ]

    assert "Hello, there!" not in critical_messages
Beispiel #10
0
def test_logging():
    with safe_tempfile_path() as test_file_path:
        with safe_tempfile_path() as critical_file_path:
            execute_pipeline(
                reconstructable(define_hello_logging_pipeline),
                {
                    'loggers': {
                        'test': {
                            'config': {
                                'name': 'test',
                                'file_path': test_file_path,
                                'log_level': 'DEBUG',
                            }
                        },
                        'critical': {
                            'config': {
                                'name': 'critical',
                                'file_path': critical_file_path,
                                'log_level': 'CRITICAL',
                            }
                        },
                    }
                },
                instance=DagsterInstance.local_temp(),
            )

            with open(test_file_path, 'r') as test_file:
                records = [
                    json.loads(line)
                    for line in test_file.read().strip('\n').split('\n')
                    if line
                ]

            with open(critical_file_path, 'r') as critical_file:
                critical_records = [
                    json.loads(line)
                    for line in critical_file.read().strip('\n').split('\n')
                    if line
                ]

    messages = [x['dagster_meta']['orig_message'] for x in records]

    assert 'Hello, there!' in messages

    critical_messages = [
        x['dagster_meta']['orig_message'] for x in critical_records
    ]

    assert 'Hello, there!' not in critical_messages
Beispiel #11
0
def test_custom_dagster_dataframe_loading_ok():
    input_dataframe = DataFrame({"foo": [1, 2, 3]})
    with safe_tempfile_path() as input_csv_fp, safe_tempfile_path(
    ) as output_csv_fp:
        input_dataframe.to_csv(input_csv_fp)
        TestDataFrame = create_dagster_pandas_dataframe_type(
            name="TestDataFrame",
            columns=[
                PandasColumn.exists("foo"),
            ],
        )

        @solid(
            input_defs=[InputDefinition("test_df", TestDataFrame)],
            output_defs=[OutputDefinition(TestDataFrame)],
        )
        def use_test_dataframe(_, test_df):
            test_df["bar"] = [2, 4, 6]
            return test_df

        solid_result = execute_solid(
            use_test_dataframe,
            run_config={
                "solids": {
                    "use_test_dataframe": {
                        "inputs": {
                            "test_df": {
                                "csv": {
                                    "path": input_csv_fp
                                }
                            }
                        },
                        "outputs": [
                            {
                                "result": {
                                    "csv": {
                                        "path": output_csv_fp
                                    }
                                }
                            },
                        ],
                    }
                }
            },
        )

        assert solid_result.success
        solid_output_df = read_csv(output_csv_fp)
        assert all(solid_output_df["bar"] == [2, 4, 6])
Beispiel #12
0
def test_interrupt_ipc_subprocess_by_pid():
    with safe_tempfile_path() as started_sentinel:
        with safe_tempfile_path() as interrupt_sentinel:
            sleepy_process = open_ipc_subprocess([
                sys.executable,
                file_relative_path(__file__,
                                   "subprocess_with_interrupt_support.py"),
                started_sentinel,
                interrupt_sentinel,
            ])
            wait_for_file(started_sentinel)
            interrupt_ipc_subprocess_pid(sleepy_process.pid)
            wait_for_file(interrupt_sentinel)
            with open(interrupt_sentinel, "r") as fd:
                assert fd.read().startswith("received_keyboard_interrupt")
Beispiel #13
0
def test_resources_notebook_with_exception():
    result = None
    with safe_tempfile_path() as path:
        with exec_for_test(
                "resource_with_exception_pipeline",
            {"resources": {
                "list": {
                    "config": path
                }
            }},
                raise_on_error=False,
        ) as result:
            assert not result.success
            assert result.step_event_list[8].event_type.value == "STEP_FAILURE"
            assert ("raise Exception()" in result.step_event_list[8].
                    event_specific_data.error.cause.message)

            # Expect something like:
            # ['e8d636: Opened', 'e8d636: Hello, solid!', '9d438e: Opened',
            #  '9d438e: Hello, notebook!', '9d438e: Closed', 'e8d636: Closed']
            with open(path, "rb") as fd:
                messages = pickle.load(fd)

            messages = [message.split(": ") for message in messages]

            resource_ids = [x[0] for x in messages]
            assert len(set(resource_ids)) == 2
            assert resource_ids[0] == resource_ids[1] == resource_ids[5]
            assert resource_ids[2] == resource_ids[3] == resource_ids[4]

            msgs = [x[1] for x in messages]
            assert msgs[0] == msgs[2] == "Opened"
            assert msgs[4] == msgs[5] == "Closed"
            assert msgs[1] == "Hello, solid!"
            assert msgs[3] == "Hello, notebook!"
Beispiel #14
0
def test_resources_notebook():
    with safe_tempfile_path() as path:
        with exec_for_test(
                "resource_pipeline",
            {"resources": {
                "list": {
                    "config": path
                }
            }},
                mode="prod",
        ) as result:
            assert result.success

            # Expect something like:
            # ['e8d636: Opened', 'e8d636: Hello, solid!', '9d438e: Opened',
            #  '9d438e: Hello, notebook!', '9d438e: Closed', 'e8d636: Closed']
            with open(path, "rb") as fd:
                messages = pickle.load(fd)

            messages = [message.split(": ") for message in messages]

            resource_ids = [x[0] for x in messages]
            assert len(set(resource_ids)) == 2
            assert resource_ids[0] == resource_ids[1] == resource_ids[5]
            assert resource_ids[2] == resource_ids[3] == resource_ids[4]

            msgs = [x[1] for x in messages]
            assert msgs[0] == msgs[2] == "Opened"
            assert msgs[4] == msgs[5] == "Closed"
            assert msgs[1] == "Hello, solid!"
            assert msgs[3] == "Hello, notebook!"
def test_has_run_query_and_terminate():
    with temp_instance() as instance:
        with safe_tempfile_path() as path:
            run_config = {'solids': {'loop': {'config': {'file': path}}}}

            created_pipeline_run = instance.create_run_for_pipeline(
                pipeline_def=infinite_loop_pipeline,
                run_config=run_config,
            )

            pipeline_run = instance.launch_run(
                created_pipeline_run.run_id,
                _external_pipeline_from_def(infinite_loop_pipeline))

            while not os.path.exists(path):
                time.sleep(0.1)

            assert os.path.exists(path)

            run_launcher = instance.run_launcher

            assert run_launcher.can_terminate(pipeline_run.run_id)
            assert run_launcher.terminate(pipeline_run.run_id)
            assert instance.get_run_by_id(pipeline_run.run_id).is_finished
            assert not run_launcher.can_terminate(pipeline_run.run_id)
            assert not run_launcher.terminate(pipeline_run.run_id)

        assert not os.path.exists(path)
Beispiel #16
0
def ephemeral_grpc_api_client(force_port=False):
    if seven.IS_WINDOWS or force_port:
        port = find_free_port()
        server_process = open_server_process(port=port, socket=None)

        if server_process is None:
            raise CouldNotStartServerProcess(port=port, socket=None)

        client = DagsterGrpcClient(port=port, server_process=server_process)

        try:
            yield client
        finally:
            client.terminate_server_process()

    else:
        with safe_tempfile_path() as socket:
            server_process = open_server_process(port=None, socket=socket)

            if server_process is None:
                raise CouldNotStartServerProcess(port=None, socket=socket)

            client = DagsterGrpcClient(socket=socket,
                                       server_process=server_process)

            try:
                yield client
            finally:
                client.terminate_server_process()
Beispiel #17
0
def test_single_proc_interrupt():
    @pipeline
    def write_a_file_pipeline():
        write_a_file()

    with safe_tempfile_path() as success_tempfile:

        # launch a thread the waits until the file is written to launch an interrupt
        Thread(target=_send_kbd_int, args=([success_tempfile], )).start()

        results = []
        try:
            # launch a pipeline that writes a file and loops infinitely
            # next time the launched thread wakes up it will send a keyboard
            # interrupt
            for result in execute_pipeline_iterator(
                    write_a_file_pipeline,
                    run_config={
                        "solids": {
                            "write_a_file": {
                                "config": {
                                    "tempfile": success_tempfile
                                }
                            }
                        }
                    },
            ):
                results.append(result.event_type)
            assert False  # should never reach
        except KeyboardInterrupt:
            pass

        assert DagsterEventType.STEP_FAILURE in results
        assert DagsterEventType.PIPELINE_FAILURE in results
def test_basic_cancellation():
    context = define_test_subprocess_context(DagsterInstance.local_temp())
    with safe_tempfile_path() as path:
        result = execute_dagster_graphql(
            context,
            START_PIPELINE_EXECUTION_QUERY,
            variables={
                'executionParams': {
                    'selector': {'name': 'infinite_loop_pipeline'},
                    'mode': 'default',
                    'environmentConfigData': {'solids': {'loop': {'config': {'file': path}}}},
                }
            },
        )

        assert not result.errors
        assert result.data

        # just test existence
        assert result.data['startPipelineExecution']['__typename'] == 'StartPipelineRunSuccess'
        run_id = result.data['startPipelineExecution']['run']['runId']

        assert run_id

        # ensure the execution has happened
        while not os.path.exists(path):
            time.sleep(0.1)

        result = execute_dagster_graphql(
            context, RUN_CANCELLATION_QUERY, variables={'runId': run_id}
        )

        assert (
            result.data['cancelPipelineExecution']['__typename'] == 'CancelPipelineExecutionSuccess'
        )
Beispiel #19
0
    def test_terminate_failed(self, graphql_context):
        selector = infer_pipeline_selector(graphql_context, "infinite_loop_pipeline")
        with safe_tempfile_path() as path:
            old_terminate = graphql_context.instance.run_launcher.terminate
            graphql_context.instance.run_launcher.terminate = lambda _run_id: False
            result = execute_dagster_graphql(
                graphql_context,
                LAUNCH_PIPELINE_EXECUTION_MUTATION,
                variables={
                    "executionParams": {
                        "selector": selector,
                        "mode": "default",
                        "runConfigData": {"solids": {"loop": {"config": {"file": path}}}},
                    }
                },
            )

            assert not result.errors
            assert result.data

            # just test existence
            assert (
                result.data["launchPipelineExecution"]["__typename"] == "LaunchPipelineRunSuccess"
            )
            run_id = result.data["launchPipelineExecution"]["run"]["runId"]
            # ensure the execution has happened
            while not os.path.exists(path):
                time.sleep(0.1)

            result = execute_dagster_graphql(
                graphql_context, RUN_CANCELLATION_QUERY, variables={"runId": run_id}
            )
            assert (
                result.data["terminatePipelineExecution"]["__typename"]
                == "TerminatePipelineExecutionFailure"
            )
            assert result.data["terminatePipelineExecution"]["message"].startswith(
                "Unable to terminate run"
            )

            result = execute_dagster_graphql(
                graphql_context,
                RUN_CANCELLATION_QUERY,
                variables={"runId": run_id, "terminatePolicy": "MARK_AS_CANCELED_IMMEDIATELY"},
            )

            assert (
                result.data["terminatePipelineExecution"]["__typename"]
                == "TerminatePipelineExecutionSuccess"
            )

            assert result.data["terminatePipelineExecution"]["run"]["runId"] == run_id

            graphql_context.instance.run_launcher.terminate = old_terminate

            # Clean up the run process on the gRPC server
            repository_location_handle = graphql_context.repository_locations[0].location_handle
            repository_location_handle.client.cancel_execution(
                CancelExecutionRequest(run_id=run_id)
            )
Beispiel #20
0
def kind_kubeconfig(cluster_name, use_internal_address=True):
    '''For kind clusters, we need to write our own kubeconfig file to leave the user's existing
    kubeconfig alone
    '''
    check.str_param(cluster_name, 'cluster_name')
    check.bool_param(use_internal_address, 'use_internal_address')

    old_kubeconfig = os.getenv('KUBECONFIG')
    try:
        kubeconfig_call = ['kind', 'get', 'kubeconfig', '--name', cluster_name]
        if use_internal_address:
            kubeconfig_call += ['--internal']

        with safe_tempfile_path() as kubeconfig_file:
            print('Writing kubeconfig to file %s' % kubeconfig_file)

            with open(kubeconfig_file, 'wb') as f:
                subprocess.check_call(kubeconfig_call, stdout=f)

            os.environ['KUBECONFIG'] = kubeconfig_file

            yield kubeconfig_file

    finally:
        print('Cleaning up kubeconfig')
        if 'KUBECONFIG' in os.environ:
            del os.environ['KUBECONFIG']

        if old_kubeconfig is not None:
            os.environ['KUBECONFIG'] = old_kubeconfig
Beispiel #21
0
def in_pipeline_manager(pipeline_name='hello_world_pipeline',
                        solid_handle=SolidHandle('hello_world', 'hello_world',
                                                 None),
                        handle_kwargs=None,
                        **kwargs):
    manager = Manager()

    run_id = str(uuid.uuid4())

    marshal_dir = tempfile.mkdtemp()

    if not handle_kwargs:
        handle_kwargs = {
            'pipeline_name': pipeline_name,
            'module_name': 'dagstermill.examples.repository',
            'fn_name': 'define_hello_world_pipeline',
        }

    try:
        with safe_tempfile_path() as output_log_file_path:
            context_dict = {
                'run_config_kwargs': dict(run_id=run_id, mode='default'),
                'solid_handle_kwargs': solid_handle._asdict(),
                'handle_kwargs': handle_kwargs,
                'marshal_dir': marshal_dir,
                'environment_dict': {},
                'output_log_path': output_log_file_path,
            }

            manager.reconstitute_pipeline_context(
                **dict(context_dict, **kwargs))
            yield manager
    finally:
        shutil.rmtree(marshal_dir)
def test_execute_pipeline_command_missing_args():
    runner = CliRunner()

    with safe_tempfile_path() as filename:
        result = runner.invoke(
            execute_pipeline_command,
            [
                '-y',
                file_relative_path(__file__, 'repository_file.yaml'), 'foo',
                filename
            ],
        )

        assert result.exit_code == 0

        with open(filename, 'r') as f:
            # Read lines from output file, and strip newline characters
            lines = [line.rstrip() for line in f.readlines()]
            assert len(lines) == 3

            # Check all lines are serialized dagster named tuples
            for line in lines:
                deserialize_json_to_dagster_namedtuple(line)

            assert deserialize_json_to_dagster_namedtuple(
                lines[0]) == IPCStartMessage()
            assert deserialize_json_to_dagster_namedtuple(
                lines[-1]) == IPCEndMessage()
Beispiel #23
0
    def test_force_cancel_queued_run(self, graphql_context):
        selector = infer_pipeline_selector(graphql_context, "infinite_loop_pipeline")
        with safe_tempfile_path() as path:
            result = execute_dagster_graphql(
                graphql_context,
                LAUNCH_PIPELINE_EXECUTION_MUTATION,
                variables={
                    "executionParams": {
                        "selector": selector,
                        "mode": "default",
                        "runConfigData": {"solids": {"loop": {"config": {"file": path}}}},
                    }
                },
            )

            assert not result.errors
            assert result.data

            # just test existence
            assert (
                result.data["launchPipelineExecution"]["__typename"] == "LaunchPipelineRunSuccess"
            )
            run_id = result.data["launchPipelineExecution"]["run"]["runId"]

            assert graphql_context.instance.get_run_by_id(run_id).status == PipelineRunStatus.QUEUED

            result = execute_dagster_graphql(
                graphql_context,
                RUN_CANCELLATION_QUERY,
                variables={"runId": run_id, "terminatePolicy": "MARK_AS_CANCELED_IMMEDIATELY"},
            )
            assert (
                result.data["terminatePipelineExecution"]["__typename"]
                == "TerminatePipelineExecutionSuccess"
            )
Beispiel #24
0
def test_even_type_materialization_config():
    class EvenType:
        def __init__(self, num):
            assert num % 2 is 0
            self.num = num

    @dagster_type_materializer({"path": str})
    def save_to_file_materialization(_, cfg, value):
        with open(cfg["path"], "w") as ff:
            ff.write(str(value))
            return AssetMaterialization(
                "path", "Wrote out value to {path}".format(path=path), metadata={"path": path}
            )

    EvenDagsterType = PythonObjectDagsterType(EvenType, materializer=save_to_file_materialization)

    @solid
    def double_even(_, even_num: EvenDagsterType) -> EvenDagsterType:
        return EvenType(even_num.num * 2)

    with safe_tempfile_path() as path:
        yaml_doc = """
solids:
    double_even:
        outputs:
            - result:
                path: {path}
 """
        solid_result = execute_solid(
            double_even,
            input_values={"even_num": EvenType(2)},
            run_config=yaml.safe_load(yaml_doc.format(path=path)),
        )
        assert solid_result.success
Beispiel #25
0
def test_has_run_query_and_terminate():
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'infinite_loop_pipeline')

    instance = DagsterInstance.local_temp()

    with safe_tempfile_path() as path:
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=infinite_loop_pipeline,
            environment_dict={'solids': {
                'loop': {
                    'config': {
                        'file': path
                    }
                }
            }},
        )
        execution_manager = SubprocessExecutionManager(instance)
        execution_manager.execute_pipeline(handle, infinite_loop_pipeline,
                                           pipeline_run, instance)

        while not os.path.exists(path):
            time.sleep(0.1)

        assert os.path.exists(path)

        assert execution_manager.is_process_running(pipeline_run.run_id)
        assert execution_manager.terminate(pipeline_run.run_id)
        assert instance.get_run_by_id(pipeline_run.run_id).is_finished
        assert not execution_manager.is_process_running(pipeline_run.run_id)
        assert not execution_manager.terminate(pipeline_run.run_id)

    assert not os.path.exists(path)
Beispiel #26
0
def test_interrupt_ipc_subprocess_grandchild():
    with ExitStack() as context_stack:
        (
            child_opened_sentinel,
            parent_interrupt_sentinel,
            child_started_sentinel,
            child_interrupt_sentinel,
        ) = [
            context_stack.enter_context(safe_tempfile_path()) for _ in range(4)
        ]
        child_process = open_ipc_subprocess([
            sys.executable,
            file_relative_path(__file__,
                               "parent_subprocess_with_interrupt_support.py"),
            child_opened_sentinel,
            parent_interrupt_sentinel,
            child_started_sentinel,
            child_interrupt_sentinel,
        ])
        wait_for_file(child_opened_sentinel)
        wait_for_file(child_started_sentinel)
        interrupt_ipc_subprocess(child_process)
        wait_for_file(child_interrupt_sentinel)
        with open(child_interrupt_sentinel, "r") as fd:
            assert fd.read().startswith("received_keyboard_interrupt")
        wait_for_file(parent_interrupt_sentinel)
        with open(parent_interrupt_sentinel, "r") as fd:
            assert fd.read().startswith("parent_received_keyboard_interrupt")
Beispiel #27
0
def test_max_concurrency_zero():
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'infinite_loop_pipeline')

    with safe_tempfile_path() as filepath:
        instance = DagsterInstance.local_temp()
        execution_manager = QueueingSubprocessExecutionManager(
            instance, max_concurrent_runs=0)

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=infinite_loop_pipeline,
            environment_dict={
                'solids': {
                    'loop': {
                        'config': {
                            'file': filepath
                        }
                    }
                }
            },
        )
        execution_manager.execute_pipeline(handle, infinite_loop_pipeline,
                                           pipeline_run, instance)
        assert not execution_manager.is_active(pipeline_run.run_id)
        assert not os.path.exists(filepath)
def test_resources_notebook():
    with safe_tempfile_path() as path:
        with exec_for_test(
                'define_resource_pipeline',
            {'resources': {
                'list': {
                    'config': path
                }
            }},
                run_config=RunConfig(mode='prod'),
        ) as result:
            assert result.success

            # Expect something like:
            # ['e8d636: Opened', 'e8d636: Hello, solid!', '9d438e: Opened',
            #  '9d438e: Hello, notebook!', '9d438e: Closed', 'e8d636: Closed']
            with open(path, 'rb') as fd:
                messages = pickle.load(fd)

            messages = [message.split(': ') for message in messages]

            resource_ids = [x[0] for x in messages]
            assert len(set(resource_ids)) == 2
            assert resource_ids[0] == resource_ids[1] == resource_ids[5]
            assert resource_ids[2] == resource_ids[3] == resource_ids[4]

            msgs = [x[1] for x in messages]
            assert msgs[0] == msgs[2] == 'Opened'
            assert msgs[4] == msgs[5] == 'Closed'
            assert msgs[1] == 'Hello, solid!'
            assert msgs[3] == 'Hello, notebook!'
Beispiel #29
0
def test_server_socket():
    with safe_tempfile_path() as skt:
        server_process = open_server_process(port=None, socket=skt)
        try:
            assert DagsterGrpcClient(socket=skt).ping("foobar") == "foobar"
        finally:
            interrupt_ipc_subprocess_pid(server_process.pid)
Beispiel #30
0
def kind_kubeconfig(cluster_name, use_internal_address=True):
    """For kind clusters, we need to write our own kubeconfig file to leave the user's existing
    kubeconfig alone
    """
    check.str_param(cluster_name, "cluster_name")
    check.bool_param(use_internal_address, "use_internal_address")

    old_kubeconfig = os.getenv("KUBECONFIG")
    try:
        kubeconfig_call = ["kind", "get", "kubeconfig", "--name", cluster_name]
        if use_internal_address:
            kubeconfig_call += ["--internal"]

        with safe_tempfile_path() as kubeconfig_file:
            print("Writing kubeconfig to file %s" % kubeconfig_file)

            with open(kubeconfig_file, "wb") as f:
                subprocess.check_call(kubeconfig_call, stdout=f)

            os.environ["KUBECONFIG"] = kubeconfig_file

            yield kubeconfig_file

    finally:
        print("Cleaning up kubeconfig")
        if "KUBECONFIG" in os.environ:
            del os.environ["KUBECONFIG"]

        if old_kubeconfig is not None:
            os.environ["KUBECONFIG"] = old_kubeconfig