def test_bad_load(): with _default_instance() as instance: instance = DagsterInstance.get() working_directory = os.path.dirname(__file__) loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, attribute="doesnt_exist", working_directory=working_directory, ) repo_origin = ExternalRepositoryOrigin( ManagedGrpcPythonEnvRepositoryLocationOrigin( loadable_target_origin=loadable_target_origin ), "doesnt_exist", ) schedule_origin = repo_origin.get_job_origin("also_doesnt_exist") result = sync_launch_scheduled_execution(schedule_origin) assert isinstance(result, ScheduledExecutionFailed) assert "doesnt_exist not found at module scope in file" in result.errors[0].to_string() ticks = instance.get_job_ticks(schedule_origin.get_id()) assert ticks[0].status == JobTickStatus.FAILURE assert "doesnt_exist not found at module scope in file" in ticks[0].error.message
def test_origin_ids_stable(): # This test asserts fixed schedule origin IDs to prevent any changes from # accidentally shifting these ids that are persisted to ScheduleStorage python_origin = ExternalJobOrigin( ExternalRepositoryOrigin( ManagedGrpcPythonEnvRepositoryLocationOrigin( LoadableTargetOrigin( executable_path="/fake/executable", python_file="/fake/file/path", attribute="fake_attribute", ) ), "fake_repo", ), "fake_schedule", ) assert python_origin.get_id() == "eb01cc697463ba614a67567fdeaafcccc60f0fc4" grpc_origin = ExternalJobOrigin( ExternalRepositoryOrigin( GrpcServerRepositoryLocationOrigin(host="fakehost", port=52618), "repo_name" ), "fake_schedule", ) assert grpc_origin.get_id() == "0961ecddbddfc71104adf036ebe8cd97a94dc77b"
def test_grpc_server_down(): with _default_instance() as instance: down_grpc_repo_origin = ExternalRepositoryOrigin( GrpcServerRepositoryLocationOrigin( host="localhost", port=find_free_port(), socket=None, ), repository_name="down_repo", ) down_grpc_schedule_origin = down_grpc_repo_origin.get_job_origin( "down_schedule") instance = DagsterInstance.get() result = sync_launch_scheduled_execution(down_grpc_schedule_origin, "US/Eastern") assert isinstance(result, ScheduledExecutionFailed) assert "failed to connect to all addresses" in result.errors[ 0].to_string() ticks = instance.get_job_ticks(down_grpc_schedule_origin.get_id()) assert ticks[0].status == JobTickStatus.FAILURE assert "failed to connect to all addresses" in ticks[0].error.message
def python_schedule_origin(schedule_name): loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, attribute="the_repo" ) repo_origin = ExternalRepositoryOrigin( ManagedGrpcPythonEnvRepositoryLocationOrigin(loadable_target_origin=loadable_target_origin), "the_repo", ) yield repo_origin.get_job_origin(schedule_name)
def grpc_schedule_origin(schedule_name): loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, attribute="the_repo" ) server_process = GrpcServerProcess(loadable_target_origin=loadable_target_origin) with server_process.create_ephemeral_client() as api_client: repo_origin = ExternalRepositoryOrigin( GrpcServerRepositoryLocationOrigin( host=api_client.host, port=api_client.port, socket=api_client.socket, ), repository_name="the_repo", ) yield repo_origin.get_job_origin(schedule_name) server_process.wait()
def _get_unloadable_sensor_origin(): working_directory = os.path.dirname(__file__) recon_repo = ReconstructableRepository.for_file(__file__, "doesnt_exist", working_directory) return ExternalRepositoryOrigin( InProcessRepositoryLocationOrigin(recon_repo), "fake_repository").get_job_origin("doesnt_exist")
def sync_get_streaming_external_repositories_data_grpc( api_client: "DagsterGrpcClient", repository_location: "RepositoryLocation" ) -> Mapping[str, ExternalRepositoryData]: from dagster.core.host_representation import ExternalRepositoryOrigin, RepositoryLocation check.inst_param(repository_location, "repository_location", RepositoryLocation) repo_datas = {} for repository_name in repository_location.repository_names: # type: ignore external_repository_chunks = list( api_client.streaming_external_repository( external_repository_origin=ExternalRepositoryOrigin( repository_location.origin, repository_name, ))) external_repository_data = deserialize_as( "".join([ chunk["serialized_external_repository_chunk"] for chunk in external_repository_chunks ]), ExternalRepositoryData, ) repo_datas[repository_name] = external_repository_data return repo_datas
def sync_get_streaming_external_repositories_grpc(api_client, repository_location_handle): check.inst_param( repository_location_handle, "repository_location_handle", RepositoryLocationHandle ) repos = [] for repository_name in repository_location_handle.repository_names: external_repository_chunks = list( api_client.streaming_external_repository( external_repository_origin=ExternalRepositoryOrigin( repository_location_handle.origin, repository_name, ) ) ) external_repository_data = deserialize_json_to_dagster_namedtuple( "".join( [ chunk["serialized_external_repository_chunk"] for chunk in external_repository_chunks ] ) ) repos.append( ExternalRepository( external_repository_data, RepositoryHandle( repository_name=external_repository_data.name, repository_location_handle=repository_location_handle, ), ) ) return repos
def sync_get_external_repositories_grpc(api_client, repository_location_handle): check.inst_param( repository_location_handle, "repository_location_handle", RepositoryLocationHandle ) repos = [] for repository_name in repository_location_handle.repository_names: external_repository_data = check.inst( api_client.external_repository( external_repository_origin=ExternalRepositoryOrigin( repository_location_handle.origin, repository_name, ) ), ExternalRepositoryData, ) repos.append( ExternalRepository( external_repository_data, RepositoryHandle( repository_name=external_repository_data.name, repository_location_handle=repository_location_handle, ), ) ) return repos
def sync_get_streaming_external_repositories_data_grpc(api_client, repository_location): from dagster.core.host_representation import ( RepositoryLocation, ExternalRepositoryOrigin, ) check.inst_param(repository_location, "repository_location", RepositoryLocation) repo_datas = {} for repository_name in repository_location.repository_names: external_repository_chunks = list( api_client.streaming_external_repository( external_repository_origin=ExternalRepositoryOrigin( repository_location.origin, repository_name, ) ) ) external_repository_data = deserialize_json_to_dagster_namedtuple( "".join( [ chunk["serialized_external_repository_chunk"] for chunk in external_repository_chunks ] ) ) repo_datas[repository_name] = external_repository_data return repo_datas
def fake_repo_target(): return ExternalRepositoryOrigin( ManagedGrpcPythonEnvRepositoryLocationOrigin( LoadableTargetOrigin(executable_path=sys.executable, module_name="fake", attribute="fake"), ), "fake_repo_name", )
def get_external_repository_origin_from_kwargs(kwargs): provided_repo_name = kwargs.get("repository") if not provided_repo_name: raise click.UsageError("Must provide --repository to load a repository") repository_location_origin = get_repository_location_origin_from_kwargs(kwargs) return ExternalRepositoryOrigin(repository_location_origin, provided_repo_name)
def _get_unloadable_sensor_origin(name): working_directory = os.path.dirname(__file__) loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, working_directory=working_directory, ) return ExternalRepositoryOrigin( InProcessRepositoryLocationOrigin(loadable_target_origin), "fake_repository").get_instigator_origin(name)
def _unloadable_partition_set_origin(): working_directory = os.path.dirname(__file__) return ExternalRepositoryOrigin( InProcessRepositoryLocationOrigin( LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, working_directory=working_directory, )), "fake_repository", ).get_partition_set_origin("doesnt_exist")
def test_bad_load_sensor_repository(external_repo_context, capfd): freeze_datetime = to_timezone( create_pendulum_time(year=2019, month=2, day=27, hour=23, minute=59, second=59, tz="UTC"), "US/Central", ) with instance_with_sensors(external_repo_context) as ( instance, grpc_server_registry, external_repo, ): with pendulum.test(freeze_datetime): external_sensor = external_repo.get_external_sensor( "simple_sensor") valid_origin = external_sensor.get_external_origin() # Swap out a new repository name invalid_repo_origin = ExternalJobOrigin( ExternalRepositoryOrigin( valid_origin.external_repository_origin. repository_location_origin, "invalid_repo_name", ), valid_origin.job_name, ) instance.add_job_state( JobState(invalid_repo_origin, JobType.SENSOR, JobStatus.RUNNING)) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(invalid_repo_origin.get_id()) assert len(ticks) == 0 evaluate_sensors(instance, grpc_server_registry) assert instance.get_runs_count() == 0 ticks = instance.get_job_ticks(invalid_repo_origin.get_id()) assert len(ticks) == 0 captured = capfd.readouterr() assert "Sensor daemon caught an error for sensor simple_sensor" in captured.out assert ( "Could not find repository invalid_repo_name in location test_location to run sensor simple_sensor" in captured.out)
def test_cancel_run(): with instance_for_test() as instance: loadable_target_origin = LoadableTargetOrigin( executable_path=sys.executable, python_file=__file__, working_directory=None, ) server_process = GrpcServerProcess(loadable_target_origin, max_workers=10) with server_process.create_ephemeral_client() as api_client: streaming_results = [] pipeline_run = instance.create_run_for_pipeline( streaming_pipeline, run_config={ "solids": { "streamer": { "config": { "length": 20 } } } }, ) execute_run_args = ExecuteExternalPipelineArgs( pipeline_origin=ExternalPipelineOrigin( ExternalRepositoryOrigin( repository_location_origin= GrpcServerRepositoryLocationOrigin( host="localhost", socket=api_client.socket, port=api_client.port, ), repository_name="test_repository", ), pipeline_name="streaming_pipeline", ), pipeline_run_id=pipeline_run.run_id, instance_ref=instance.get_ref(), ) stream_events_result_thread = threading.Thread( target=_stream_events_target, args=[streaming_results, api_client, execute_run_args]) stream_events_result_thread.daemon = True stream_events_result_thread.start() poll_for_step_start(instance, pipeline_run.run_id) res = api_client.cancel_execution( cancel_execution_request=CancelExecutionRequest( run_id=pipeline_run.run_id)) assert res.success is True poll_for_finished_run(instance, pipeline_run.run_id) logs = instance.all_logs(pipeline_run.run_id) assert (len([ ev for ev in logs if ev.dagster_event.event_type_value == "STEP_MATERIALIZATION" ]) < 20) # soft termination assert [ ev for ev in logs if ev.dagster_event.event_type_value == "STEP_FAILURE" ] server_process.wait()