def test_queued_pipeline_origin_check(): code_pointer = ModuleCodePointer("fake", "fake") fake_pipeline_origin = ExternalPipelineOrigin( ExternalRepositoryOrigin( InProcessRepositoryLocationOrigin( ReconstructableRepository(code_pointer)), "foo_repo", ), "foo", ) fake_code_origin = PipelinePythonOrigin( pipeline_name="foo", repository_origin=RepositoryPythonOrigin( sys.executable, code_pointer, ), ) PipelineRun( status=PipelineRunStatus.QUEUED, external_pipeline_origin=fake_pipeline_origin, pipeline_code_origin=fake_code_origin, ) with pytest.raises(check.CheckError): PipelineRun(status=PipelineRunStatus.QUEUED) with pytest.raises(check.CheckError): PipelineRun().with_status(PipelineRunStatus.QUEUED)
def get_external_origin(self): """ Hack! Inject origin that the k8s images will use. The BK image uses a different directory structure (/workdir/python_modules/dagster-test/dagster_test/test_project) than the images inside the kind cluster (/dagster_test/test_project). As a result the normal origin won't work, we need to inject this one. """ return ExternalJobOrigin( external_repository_origin=ExternalRepositoryOrigin( repository_location_origin=InProcessRepositoryLocationOrigin( recon_repo=ReconstructableRepository( pointer=FileCodePointer( python_file= "/dagster_test/test_project/test_pipelines/repo.py", fn_name="define_demo_execution_repo", ), container_image=self._container_image, executable_path="python", entry_point=DEFAULT_DAGSTER_ENTRY_POINT, )), repository_name="demo_execution_repo", ), job_name=self.name, )
def test_sensor_timeout(): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo.py") subprocess_args = [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, ] process = subprocess.Popen( subprocess_args, stdout=subprocess.PIPE, ) try: wait_for_grpc_server( process, DagsterGrpcClient(port=port, host="localhost"), subprocess_args ) client = DagsterGrpcClient(port=port) with instance_for_test() as instance: repo_origin = ExternalRepositoryOrigin( repository_location_origin=GrpcServerRepositoryLocationOrigin( port=port, host="localhost" ), repository_name="bar_repo", ) with pytest.raises(DagsterUserCodeUnreachableError) as exc_info: client.external_sensor_execution( sensor_execution_args=SensorExecutionArgs( repository_origin=repo_origin, instance_ref=instance.get_ref(), sensor_name="slow_sensor", last_completion_time=None, last_run_key=None, cursor=None, ), timeout=2, ) assert "Deadline Exceeded" in str(exc_info.getrepr()) # Call succeeds without the timeout client.external_sensor_execution( sensor_execution_args=SensorExecutionArgs( repository_origin=repo_origin, instance_ref=instance.get_ref(), sensor_name="slow_sensor", last_completion_time=None, last_run_key=None, cursor=None, ), ) finally: process.terminate()
def test_sensor_timeout(): port = find_free_port() python_file = file_relative_path(__file__, "grpc_repo.py") ipc_output_file = _get_ipc_output_file() process = subprocess.Popen( [ "dagster", "api", "grpc", "--port", str(port), "--python-file", python_file, "--ipc-output-file", ipc_output_file, ], stdout=subprocess.PIPE, ) try: wait_for_grpc_server(process, ipc_output_file) client = DagsterGrpcClient(port=port) with instance_for_test() as instance: repo_origin = ExternalRepositoryOrigin( repository_location_origin=GrpcServerRepositoryLocationOrigin( port=port, host="localhost" ), repository_name="bar_repo", ) with pytest.raises(Exception, match="Deadline Exceeded"): client.external_sensor_execution( sensor_execution_args=SensorExecutionArgs( repository_origin=repo_origin, instance_ref=instance.get_ref(), sensor_name="slow_sensor", last_completion_time=None, last_run_key=None, ), timeout=2, ) # Call succeeds without the timeout client.external_sensor_execution( sensor_execution_args=SensorExecutionArgs( repository_origin=repo_origin, instance_ref=instance.get_ref(), sensor_name="slow_sensor", last_completion_time=None, last_run_key=None, ), ) finally: process.terminate()
def get_repository_origin_from_kwargs(kwargs): provided_repo_name = kwargs.get("repository") if not provided_repo_name: raise click.UsageError( "Must provide --repository to load a repository") repository_location_origin = get_repository_location_origin_from_kwargs( kwargs) return ExternalRepositoryOrigin(repository_location_origin, provided_repo_name)
def create_invalid_run(instance, **kwargs): create_run_for_test( instance, external_pipeline_origin=ExternalPipelineOrigin( ExternalRepositoryOrigin( InProcessRepositoryLocationOrigin( ReconstructableRepository(ModuleCodePointer( "fake", "fake"))), "foo", ), "wrong-pipeline", ), pipeline_name="wrong-pipeline", **kwargs, )
def test_queued_pipeline_origin_check(): fake_pipeline_origin = ExternalPipelineOrigin( ExternalRepositoryOrigin( InProcessRepositoryLocationOrigin( ReconstructableRepository(ModuleCodePointer("fake", "fake"))), "foo_repo", ), "foo", ) PipelineRun(status=PipelineRunStatus.QUEUED, external_pipeline_origin=fake_pipeline_origin) with pytest.raises(check.CheckError): PipelineRun(status=PipelineRunStatus.QUEUED) with pytest.raises(check.CheckError): PipelineRun().with_status(PipelineRunStatus.QUEUED)
def get_external_origin(self): """ Hack! Inject origin that the k8s images will use. The k8s helm chart workspace uses a gRPC server repo location origin. As a result the normal origin won't work, we need to inject this one. """ return ExternalInstigatorOrigin( external_repository_origin=ExternalRepositoryOrigin( repository_location_origin=GrpcServerRepositoryLocationOrigin( host="user-code-deployment-1", port=3030, location_name="user-code-deployment-1", ), repository_name="demo_execution_repo", ), instigator_name=self.name, )
def get_external_origin(self): return ExternalRepositoryOrigin( self.repository_location_handle.origin, self.repository_name, )
def test_external_job_origin_instigator_origin(): def build_legacy_whitelist_map(): legacy_env = WhitelistMap.create() @_whitelist_for_serdes(legacy_env) class ExternalJobOrigin( namedtuple("_ExternalJobOrigin", "external_repository_origin job_name")): def get_id(self): return create_snapshot_id(self) @_whitelist_for_serdes(legacy_env) class ExternalRepositoryOrigin( namedtuple("_ExternalRepositoryOrigin", "repository_location_origin repository_name")): def get_id(self): return create_snapshot_id(self) class GrpcServerOriginSerializer(DefaultNamedTupleSerializer): @classmethod def skip_when_empty(cls): return {"use_ssl"} @_whitelist_for_serdes(whitelist_map=legacy_env, serializer=GrpcServerOriginSerializer) class GrpcServerRepositoryLocationOrigin( namedtuple( "_GrpcServerRepositoryLocationOrigin", "host port socket location_name use_ssl", ), ): def __new__(cls, host, port=None, socket=None, location_name=None, use_ssl=None): return super(GrpcServerRepositoryLocationOrigin, cls).__new__(cls, host, port, socket, location_name, use_ssl) return ( legacy_env, ExternalJobOrigin, ExternalRepositoryOrigin, GrpcServerRepositoryLocationOrigin, ) legacy_env, klass, repo_klass, location_klass = build_legacy_whitelist_map( ) from dagster.core.host_representation.origin import ( ExternalInstigatorOrigin, ExternalRepositoryOrigin, GrpcServerRepositoryLocationOrigin, ) # serialize from current code, compare against old code instigator_origin = ExternalInstigatorOrigin( external_repository_origin=ExternalRepositoryOrigin( repository_location_origin=GrpcServerRepositoryLocationOrigin( host="localhost", port=1234, location_name="test_location"), repository_name="the_repo", ), instigator_name="simple_schedule", ) instigator_origin_str = serialize_dagster_namedtuple(instigator_origin) instigator_to_job = _deserialize_json(instigator_origin_str, legacy_env) assert isinstance(instigator_to_job, klass) # ensure that the origin id is stable assert instigator_to_job.get_id() == instigator_origin.get_id() # # serialize from old code, compare against current code job_origin = klass( external_repository_origin=repo_klass( repository_location_origin=location_klass( host="localhost", port=1234, location_name="test_location"), repository_name="the_repo", ), job_name="simple_schedule", ) job_origin_str = serialize_value(job_origin, legacy_env) from dagster.serdes.serdes import _WHITELIST_MAP job_to_instigator = deserialize_json_to_dagster_namedtuple(job_origin_str) assert isinstance(job_to_instigator, ExternalInstigatorOrigin) # ensure that the origin id is stable assert job_to_instigator.get_id() == job_origin.get_id()