def test_load_with_empty_working_directory(capfd): port = find_free_port() # File that will fail if working directory isn't set to default python_file = file_relative_path(__file__, "grpc_repo_with_local_import.py") subprocess_args = [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, ] with new_cwd(os.path.dirname(__file__)): process = subprocess.Popen( subprocess_args, stdout=subprocess.PIPE, ) try: wait_for_grpc_server( process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args ) assert DagsterGrpcClient(port=port).ping("foobar") == "foobar" finally: process.terminate() # indicating the working directory is empty fails port = find_free_port() subprocess_args = [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--empty-working-directory", ] process = subprocess.Popen( subprocess_args, stdout=subprocess.PIPE, ) try: with pytest.raises(Exception): wait_for_grpc_server( process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args ) process.wait() _, err = capfd.readouterr() assert "No module named" in err finally: if process.poll() is None: process.terminate()
def test_load_with_empty_working_directory(capfd): port = find_free_port() # File that will fail if working directory isn't set to default python_file = file_relative_path(__file__, "grpc_repo_with_local_import.py") with new_cwd(os.path.dirname(__file__)): ipc_output_file = _get_ipc_output_file() process = subprocess.Popen( [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--ipc-output-file", ipc_output_file, ], stdout=subprocess.PIPE, ) try: wait_for_grpc_server(process, ipc_output_file) assert DagsterGrpcClient(port=port).ping("foobar") == "foobar" finally: process.terminate() # indicating the working directory is empty fails ipc_output_file = _get_ipc_output_file() process = subprocess.Popen( [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--empty-working-directory", "--ipc-output-file", ipc_output_file, ], stdout=subprocess.PIPE, ) try: with pytest.raises(DagsterUserCodeProcessError): wait_for_grpc_server(process, ipc_output_file) process.wait() _, err = capfd.readouterr() assert "No module named" in err finally: if process.poll() is None: process.terminate()
def test_grpc_watch_thread_server_complex_cycle_2(): # Server goes down, comes back up as the same server three times, then goes away and comes # back as a new server port = find_free_port() fixed_server_id = "fixed_id" events = [] called = {} def on_disconnect(): events.append("on_disconnect") def on_reconnected(): events.append("on_reconnected") def on_updated(_): events.append("on_updated") def on_error(): called["on_error"] = True events.append("on_error") # Create initial server open_server_process(port=port, socket=None, fixed_server_id=fixed_server_id) # Start watch thread client = DagsterGrpcClient(port=port) watch_interval = 1 # This is a faster watch interval than we would use in practice shutdown_event, watch_thread = create_grpc_watch_thread( client, on_disconnect=on_disconnect, on_reconnected=on_reconnected, on_updated=on_updated, on_error=on_error, watch_interval=watch_interval, max_reconnect_attempts=3, ) watch_thread.start() time.sleep(watch_interval * 3) cycles = 3 for x in range(1, cycles + 1): # Simulate server restart three times with same server ID client.shutdown_server() wait_for_condition(lambda: events.count("on_disconnect") == x, watch_interval) open_server_process(port=port, socket=None, fixed_server_id=fixed_server_id) wait_for_condition(lambda: events.count("on_reconnected") == x, watch_interval) # Simulate server failure client.shutdown_server() # Wait for reconnect attempts to exhaust and on_error callback to be called wait_for_condition(lambda: called.get("on_error"), watch_interval) shutdown_event.set() watch_thread.join() assert events[-1] == "on_error"
def test_lazy_load_via_env_var(): with environ({"DAGSTER_CLI_API_GRPC_LAZY_LOAD_USER_CODE": "1"}): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo_with_error.py") ipc_output_file = _get_ipc_output_file() process = subprocess.Popen( [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--ipc-output-file", ipc_output_file, ], stdout=subprocess.PIPE, ) try: wait_for_grpc_server(process, ipc_output_file) list_repositories_response = DagsterGrpcClient(port=port).list_repositories() assert isinstance(list_repositories_response, SerializableErrorInfo) assert "No module named" in list_repositories_response.message finally: process.terminate()
def test_lazy_load_via_env_var(): with environ({"DAGSTER_CLI_API_GRPC_LAZY_LOAD_USER_CODE": "1"}): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo_with_error.py") subprocess_args = [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, ] process = subprocess.Popen( subprocess_args, stdout=subprocess.PIPE, ) try: wait_for_grpc_server( process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args) list_repositories_response = deserialize_json_to_dagster_namedtuple( DagsterGrpcClient(port=port).list_repositories()) assert isinstance(list_repositories_response, SerializableErrorInfo) assert "No module named" in list_repositories_response.message finally: process.terminate()
def test_grpc_server_down(): with _default_instance() as instance: down_grpc_repo_origin = ExternalRepositoryOrigin( GrpcServerRepositoryLocationOrigin( host="localhost", port=find_free_port(), socket=None, ), repository_name="down_repo", ) down_grpc_schedule_origin = down_grpc_repo_origin.get_job_origin( "down_schedule") instance = DagsterInstance.get() result = sync_launch_scheduled_execution(down_grpc_schedule_origin, "US/Eastern") assert isinstance(result, ScheduledExecutionFailed) assert "failed to connect to all addresses" in result.errors[ 0].to_string() ticks = instance.get_job_ticks(down_grpc_schedule_origin.get_id()) assert ticks[0].status == JobTickStatus.FAILURE assert "failed to connect to all addresses" in ticks[0].error.message
def test_crash_during_load(): port = find_free_port() python_file = file_relative_path(__file__, "crashy_grpc_repo.py") ipc_output_file = _get_ipc_output_file() process = subprocess.Popen( [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--ipc-output-file", ipc_output_file, ], stdout=subprocess.PIPE, ) try: with pytest.raises( Exception, match=re.escape( "Process exited with return code 123 while waiting for events" ), ): wait_for_grpc_server(process, ipc_output_file) finally: if process.poll() is None: process.terminate()
def open_server_process_on_dynamic_port( max_retries=10, loadable_target_origin=None, max_workers=1, heartbeat=False, heartbeat_timeout=30, lazy_load_user_code=False, fixed_server_id=None, ): server_process = None retries = 0 while server_process is None and retries < max_retries: port = find_free_port() try: server_process = open_server_process( port=port, socket=None, loadable_target_origin=loadable_target_origin, max_workers=max_workers, heartbeat=heartbeat, heartbeat_timeout=heartbeat_timeout, lazy_load_user_code=lazy_load_user_code, fixed_server_id=fixed_server_id, ) except CouldNotBindGrpcServerToAddress: pass retries += 1 return server_process, port
def ephemeral_grpc_api_client(force_port=False): if seven.IS_WINDOWS or force_port: port = find_free_port() server_process = open_server_process(port=port, socket=None) if server_process is None: raise CouldNotStartServerProcess(port=port, socket=None) client = DagsterGrpcClient(port=port, server_process=server_process) try: yield client finally: client.terminate_server_process() else: with safe_tempfile_path() as socket: server_process = open_server_process(port=None, socket=socket) if server_process is None: raise CouldNotStartServerProcess(port=None, socket=socket) client = DagsterGrpcClient(socket=socket, server_process=server_process) try: yield client finally: client.terminate_server_process()
def test_lazy_load_with_error(): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo_with_error.py") subprocess_args = [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--lazy-load-user-code", ] process = subprocess.Popen(subprocess_args, stdout=subprocess.PIPE) try: wait_for_grpc_server(process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args) list_repositories_response = deserialize_json_to_dagster_namedtuple( DagsterGrpcClient(port=port).list_repositories()) assert isinstance(list_repositories_response, SerializableErrorInfo) assert "No module named" in list_repositories_response.message finally: process.terminate()
def test_sensor_timeout(): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo.py") subprocess_args = [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, ] process = subprocess.Popen( subprocess_args, stdout=subprocess.PIPE, ) try: wait_for_grpc_server( process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args ) client = DagsterGrpcClient(port=port) with instance_for_test() as instance: repo_origin = ExternalRepositoryOrigin( repository_location_origin=GrpcServerRepositoryLocationOrigin( port=port, host="localhost" ), repository_name="bar_repo", ) with pytest.raises(DagsterUserCodeUnreachableError) as exc_info: client.external_sensor_execution( sensor_execution_args=SensorExecutionArgs( repository_origin=repo_origin, instance_ref=instance.get_ref(), sensor_name="slow_sensor", last_completion_time=None, last_run_key=None, cursor=None, ), timeout=2, ) assert "Deadline Exceeded" in str(exc_info.getrepr()) # Call succeeds without the timeout client.external_sensor_execution( sensor_execution_args=SensorExecutionArgs( repository_origin=repo_origin, instance_ref=instance.get_ref(), sensor_name="slow_sensor", last_completion_time=None, last_run_key=None, cursor=None, ), ) finally: process.terminate()
def test_load_with_error(capfd): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo_with_error.py") subprocess_args = [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, ] process = subprocess.Popen( subprocess_args, stdout=subprocess.PIPE, ) try: with pytest.raises(Exception): wait_for_grpc_server( process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args) process.wait() _, err = capfd.readouterr() assert "No module named" in err finally: if process.poll() is None: process.terminate()
def test_crash_during_load(): port = find_free_port() python_file = file_relative_path(__file__, "crashy_grpc_repo.py") subprocess_args = [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, ] process = subprocess.Popen( subprocess_args, stdout=subprocess.PIPE, ) try: with pytest.raises( Exception, match=re.escape( 'gRPC server exited with return code 123 while starting up with the command: "dagster api grpc --port' ), ): wait_for_grpc_server( process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args) finally: if process.poll() is None: process.terminate()
def test_load_via_env_var(): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo.py") subprocess_args = [ "dagster", "api", "grpc", "--python-file", python_file, ] with environ({ "DAGSTER_CLI_API_GRPC_HOST": "localhost", "DAGSTER_CLI_API_GRPC_PORT": str(port) }): process = subprocess.Popen( subprocess_args, stdout=subprocess.PIPE, ) try: wait_for_grpc_server( process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args) assert DagsterGrpcClient(port=port).ping("foobar") == "foobar" finally: process.terminate()
def test_load_grpc_server_python_env(): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo.py") subprocess_args = [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--use-python-environment-entry-point", ] process = subprocess.Popen(subprocess_args) try: client = DagsterGrpcClient(port=port, host="localhost") wait_for_grpc_server(process, client, subprocess_args) list_repositories_response = sync_list_repositories_grpc(client) assert list_repositories_response.entry_point == [ sys.executable, "-m", "dagster" ] assert list_repositories_response.executable_path == sys.executable finally: process.terminate()
def test_server_down(): with instance_for_test() as instance: loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, attribute="nope", python_file=file_relative_path(__file__, "test_default_run_launcher.py"), ) server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4, force_port=True ) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_from_repository_location_origin( GrpcServerRepositoryLocationOrigin( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, ) ) ) external_pipeline = repository_location.get_repository( "nope" ).get_full_external_pipeline("sleepy_pipeline") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None ) instance.launch_run(pipeline_run.run_id, external_pipeline) poll_for_step_start(instance, pipeline_run.run_id) launcher = instance.run_launcher assert launcher.can_terminate(pipeline_run.run_id) original_run_tags = instance.get_run_by_id(pipeline_run.run_id).tags[GRPC_INFO_TAG] # Replace run tags with an invalid port instance.add_run_tags( pipeline_run.run_id, { GRPC_INFO_TAG: seven.json.dumps( merge_dicts({"host": "localhost"}, {"port": find_free_port()}) ) }, ) assert not launcher.can_terminate(pipeline_run.run_id) instance.add_run_tags( pipeline_run.run_id, {GRPC_INFO_TAG: original_run_tags,}, ) assert launcher.terminate(pipeline_run.run_id) server_process.wait()
def test_load_with_invalid_param(capfd): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo.py") ipc_output_file = _get_ipc_output_file() process = subprocess.Popen( [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--ipc-output-file", ipc_output_file, "--foo-param", "bar_value", ], stdout=subprocess.PIPE, ) try: with pytest.raises(DagsterIPCProtocolError): wait_for_grpc_server(process, ipc_output_file) finally: process.terminate() _, err = capfd.readouterr() assert "no such optio" in err
def test_server_down(): with grpc_instance() as instance: repo_yaml = file_relative_path(__file__, "repo.yaml") recon_repo = ReconstructableRepository.from_legacy_repository_yaml( repo_yaml) loadable_target_origin = recon_repo.get_origin().loadable_target_origin server_process = GrpcServerProcess( loadable_target_origin=loadable_target_origin, max_workers=4, force_port=True) with server_process.create_ephemeral_client() as api_client: repository_location = GrpcServerRepositoryLocation( RepositoryLocationHandle.create_grpc_server_location( location_name="test", port=api_client.port, socket=api_client.socket, host=api_client.host, )) external_pipeline = repository_location.get_repository( "nope").get_full_external_pipeline("sleepy_pipeline") pipeline_run = instance.create_run_for_pipeline( pipeline_def=sleepy_pipeline, run_config=None) launcher = instance.run_launcher launcher.launch_run(instance, pipeline_run, external_pipeline) poll_for_step_start(instance, pipeline_run.run_id) assert launcher.can_terminate(pipeline_run.run_id) original_run_tags = instance.get_run_by_id( pipeline_run.run_id).tags[GRPC_INFO_TAG] # Replace run tags with an invalid port instance.add_run_tags( pipeline_run.run_id, { GRPC_INFO_TAG: seven.json.dumps( merge_dicts({"host": "localhost"}, {"port": find_free_port()})) }, ) assert not launcher.can_terminate(pipeline_run.run_id) instance.add_run_tags( pipeline_run.run_id, { GRPC_INFO_TAG: original_run_tags, }, ) assert launcher.terminate(pipeline_run.run_id) server_process.wait()
def test_load_with_invalid_param(capfd): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo.py") subprocess_args = [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--foo-param", "bar_value", ] process = subprocess.Popen( subprocess_args, stdout=subprocess.PIPE, ) try: with pytest.raises( Exception, match='gRPC server exited with return code 2 while starting up with the command: "dagster api grpc --port', ): wait_for_grpc_server( process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args ) finally: process.terminate() _, err = capfd.readouterr() assert "no such option" in err
def test_grpc_watch_thread_server_update(): port = find_free_port() called = {} def on_updated(): called["yup"] = True # Create initial server server_process = open_server_process(port=port, socket=None) try: # Start watch thread client = DagsterGrpcClient(port=port) watch_interval = 4 shutdown_event, watch_thread = create_grpc_watch_thread( client, on_updated=on_updated, watch_interval=watch_interval) watch_thread.start() time.sleep(watch_interval * 2) finally: interrupt_ipc_subprocess_pid(server_process.pid) assert not called # Create updated server server_process = open_server_process(port=port, socket=None) try: time.sleep(watch_interval * 2) finally: interrupt_ipc_subprocess_pid(server_process.pid) shutdown_event.set() watch_thread.join() assert called
def test_load_with_error(capfd): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo_with_error.py") ipc_output_file = _get_ipc_output_file() process = subprocess.Popen( [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--ipc-output-file", ipc_output_file, ], stdout=subprocess.PIPE, ) try: with pytest.raises(DagsterIPCProtocolError): wait_for_grpc_server(ipc_output_file) _, err = capfd.readouterr() assert "No module named" in err finally: process.terminate()
def test_ping(): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo.py") ipc_output_file = _get_ipc_output_file() process = subprocess.Popen( [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--ipc-output-file", ipc_output_file, ], stdout=subprocess.PIPE, ) try: wait_for_grpc_server(ipc_output_file) assert DagsterGrpcClient(port=port).ping("foobar") == "foobar" finally: process.terminate()
def test_streaming(): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo.py") ipc_output_file = _get_ipc_output_file() process = subprocess.Popen( [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--ipc-output-file", ipc_output_file, ], stdout=subprocess.PIPE, ) try: wait_for_grpc_server(ipc_output_file) api_client = DagsterGrpcClient(port=port) results = [ result for result in api_client.streaming_ping(sequence_length=10, echo="foo") ] assert len(results) == 10 for sequence_number, result in enumerate(results): assert result["sequence_number"] == sequence_number assert result["echo"] == "foo" finally: process.terminate()
def test_lazy_load_with_error(): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo_with_error.py") ipc_output_file = _get_ipc_output_file() process = subprocess.Popen( [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--lazy-load-user-code", "--ipc-output-file", ipc_output_file, ], stdout=subprocess.PIPE, ) try: wait_for_grpc_server(ipc_output_file) list_repositories_response = DagsterGrpcClient( port=port).list_repositories() assert isinstance(list_repositories_response, SerializableErrorInfo) assert "No module named" in list_repositories_response.message finally: process.terminate()
def test_server_port_and_socket(): with safe_tempfile_path() as skt: with pytest.raises( check.CheckError, match=re.escape( "You must pass one and only one of `port` or `socket`."), ): DagsterGrpcServer(socket=skt, port=find_free_port())
def local_port_forward_postgres(): print('Port-forwarding postgres') postgres_pod_name = (check_output([ 'kubectl', 'get', 'pods', '--namespace', helm_namespace, '-l', 'app=postgresql,release=dagster', '-o', 'jsonpath="{.items[0].metadata.name}"', ]).decode('utf-8').strip('"')) forward_port = find_free_port() wait_for_pod(postgres_pod_name, namespace=helm_namespace) try: p = subprocess.Popen([ 'kubectl', 'port-forward', '--namespace', helm_namespace, postgres_pod_name, '{forward_port}:5432'.format(forward_port=forward_port), ]) # Validate port forwarding works start = time.time() while True: if time.time() - start > PG_PORT_FORWARDING_TIMEOUT: raise Exception( 'Timed out while waiting for postgres port forwarding') print( 'Waiting for port forwarding from k8s pod %s:5432 to localhost:%d to be' ' available...' % (postgres_pod_name, forward_port)) try: conn = psycopg2.connect( database='test', user='******', password='******', host='localhost', port=forward_port, ) conn.close() break except: # pylint: disable=bare-except, broad-except time.sleep(1) continue yield forward_port finally: print('Terminating port-forwarding') p.terminate()
def test_client_port_and_socket(): port = find_free_port() with safe_tempfile_path() as skt: with pytest.raises( check.CheckError, match=re.escape( "You must pass one and only one of `port` or `socket`."), ): DagsterGrpcClient(port=port, socket=skt)
def local_port_forward_postgres(namespace): print("Port-forwarding postgres") postgres_pod_name = (check_output([ "kubectl", "get", "pods", "--namespace", namespace, "-l", "app=postgresql,release=dagster", "-o", 'jsonpath="{.items[0].metadata.name}"', ]).decode("utf-8").strip('"')) forward_port = find_free_port() wait_for_pod(postgres_pod_name, namespace=namespace) try: p = subprocess.Popen([ "kubectl", "port-forward", "--namespace", namespace, postgres_pod_name, "{forward_port}:5432".format(forward_port=forward_port), ]) # Validate port forwarding works start = time.time() while True: if time.time() - start > PG_PORT_FORWARDING_TIMEOUT: raise Exception( "Timed out while waiting for postgres port forwarding") print( "Waiting for port forwarding from k8s pod %s:5432 to localhost:%d to be" " available..." % (postgres_pod_name, forward_port)) try: conn = psycopg2.connect( database="test", user="******", password="******", host="localhost", port=forward_port, ) conn.close() break except: # pylint: disable=bare-except, broad-except time.sleep(1) continue yield forward_port finally: print("Terminating port-forwarding") p.terminate()
def test_server_port(): port = find_free_port() server_process = open_server_process(port=port, socket=None) assert server_process is not None try: assert DagsterGrpcClient(port=port).ping("foobar") == "foobar" finally: if server_process is not None: interrupt_ipc_subprocess_pid(server_process.pid)
def test_sensor_timeout(): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo.py") ipc_output_file = _get_ipc_output_file() process = subprocess.Popen( [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--ipc-output-file", ipc_output_file, ], stdout=subprocess.PIPE, ) try: wait_for_grpc_server(process, ipc_output_file) client = DagsterGrpcClient(port=port) with instance_for_test() as instance: repo_origin = ExternalRepositoryOrigin( repository_location_origin=GrpcServerRepositoryLocationOrigin( port=port, host="localhost" ), repository_name="bar_repo", ) with pytest.raises(Exception, match="Deadline Exceeded"): client.external_sensor_execution( sensor_execution_args=SensorExecutionArgs( repository_origin=repo_origin, instance_ref=instance.get_ref(), sensor_name="slow_sensor", last_completion_time=None, last_run_key=None, ), timeout=2, ) # Call succeeds without the timeout client.external_sensor_execution( sensor_execution_args=SensorExecutionArgs( repository_origin=repo_origin, instance_ref=instance.get_ref(), sensor_name="slow_sensor", last_completion_time=None, last_run_key=None, ), ) finally: process.terminate()