def Prepare(self, request, context=None): # For now, just use the job name as the job id. logging.debug('Got Prepare request.') preparation_id = '%s-%s' % (request.job_name, uuid.uuid4()) provision_info = fn_api_runner.ExtendedProvisionInfo( beam_provision_api_pb2.ProvisionInfo( job_id=preparation_id, job_name=request.job_name, pipeline_options=request.pipeline_options, retrieval_token=self._artifact_service.retrieval_token( preparation_id)), self._staging_dir) self._jobs[preparation_id] = BeamJob(preparation_id, request.pipeline_options, request.pipeline, provision_info) logging.debug("Prepared job '%s' as '%s'", request.job_name, preparation_id) # TODO(angoenka): Pass an appropriate staging_session_token. The token can # be obtained in PutArtifactResponse from JobService if not self._artifact_staging_endpoint: # The front-end didn't try to stage anything, but the worker may # request what's here so we should at least store an empty manifest. self._artifact_service.CommitManifest( beam_artifact_api_pb2.CommitManifestRequest( staging_session_token=preparation_id, manifest=beam_artifact_api_pb2.Manifest())) return beam_job_api_pb2.PrepareJobResponse( preparation_id=preparation_id, artifact_staging_endpoint=self._artifact_staging_endpoint, staging_session_token=preparation_id)
def create_beam_job( self, preparation_id, # stype: str job_name, # type: str pipeline, # type: beam_runner_api_pb2.Pipeline options # type: struct_pb2.Struct ): # type: (...) -> BeamJob # TODO(angoenka): Pass an appropriate staging_session_token. The token can # be obtained in PutArtifactResponse from JobService if not self._artifact_staging_endpoint: # The front-end didn't try to stage anything, but the worker may # request what's here so we should at least store an empty manifest. self._legacy_artifact_service.CommitManifest( beam_artifact_api_pb2.CommitManifestRequest( staging_session_token=preparation_id, manifest=beam_artifact_api_pb2.Manifest())) self._artifact_service.register_job( staging_token=preparation_id, dependency_sets={ id: env.dependencies for (id, env) in pipeline.components.environments.items() }) provision_info = fn_runner.ExtendedProvisionInfo( beam_provision_api_pb2.ProvisionInfo( pipeline_options=options, retrieval_token=self._legacy_artifact_service.retrieval_token( preparation_id)), self._staging_dir, job_name=job_name) return BeamJob(preparation_id, pipeline, options, provision_info, self._artifact_staging_endpoint, self._artifact_service)
def commit_manifest(self): manifest = beam_artifact_api_pb2.Manifest(artifact=self._artifacts) self._artifacts = [] return self._artifact_staging_stub.CommitManifest( beam_artifact_api_pb2.CommitManifestRequest( manifest=manifest, staging_session_token=self._staging_session_token)).retrieval_token
def stage(self, artifact_staging_endpoint, staging_session_token): channel = grpc.insecure_channel(artifact_staging_endpoint) staging_stub = beam_artifact_api_pb2_grpc.ArtifactStagingServiceStub( channel) manifest_response = staging_stub.CommitManifest( beam_artifact_api_pb2.CommitManifestRequest( staging_session_token=staging_session_token, manifest=beam_artifact_api_pb2.Manifest())) channel.close() return manifest_response.retrieval_token
def test_end_to_end(self): job_service = local_job_service.LocalJobServicer() job_service.start_grpc_server() # this logic is taken roughly from PortableRunner.run_pipeline() # Prepare the job. prepare_response = job_service.Prepare( beam_job_api_pb2.PrepareJobRequest( job_name='job', pipeline=beam_runner_api_pb2.Pipeline())) channel = grpc.insecure_channel( prepare_response.artifact_staging_endpoint.url) retrieval_token = beam_artifact_api_pb2_grpc.ArtifactStagingServiceStub( channel).CommitManifest( beam_artifact_api_pb2.CommitManifestRequest( staging_session_token=prepare_response. staging_session_token, manifest=beam_artifact_api_pb2.Manifest())).retrieval_token channel.close() state_stream = job_service.GetStateStream( beam_job_api_pb2.GetJobStateRequest( job_id=prepare_response.preparation_id)) # If there's an error, we don't always get it until we try to read. # Fortunately, there's always an immediate current state published. # state_results.append(next(state_stream)) state_stream = increment_iter(state_stream) message_stream = job_service.GetMessageStream( beam_job_api_pb2.JobMessagesRequest( job_id=prepare_response.preparation_id)) job_service.Run( beam_job_api_pb2.RunJobRequest( preparation_id=prepare_response.preparation_id, retrieval_token=retrieval_token)) state_results = list(state_stream) message_results = list(message_stream) expected_states = [ beam_job_api_pb2.JobState.STOPPED, beam_job_api_pb2.JobState.STARTING, beam_job_api_pb2.JobState.RUNNING, beam_job_api_pb2.JobState.DONE, ] self.assertEqual([s.state for s in state_results], expected_states) self.assertEqual([s.state_response.state for s in message_results], expected_states)
def run_pipeline(self, pipeline): # Java has different expectations about coders # (windowed in Fn API, but *un*windowed in runner API), whereas the # FnApiRunner treats them consistently, so we must guard this. # See also BEAM-2717. proto_context = pipeline_context.PipelineContext( default_environment_url=self._docker_image) proto_pipeline = pipeline.to_runner_api(context=proto_context) if self._runner_api_address: for pcoll in proto_pipeline.components.pcollections.values(): if pcoll.coder_id not in proto_context.coders: coder = coders.registry.get_coder( pickler.loads(pcoll.coder_id)) pcoll.coder_id = proto_context.coders.get_id(coder) proto_context.coders.populate_map(proto_pipeline.components.coders) # Some runners won't detect the GroupByKey transform unless it has no # subtransforms. Remove all sub-transforms until BEAM-4605 is resolved. for _, transform_proto in list( proto_pipeline.components.transforms.items()): if transform_proto.spec.urn == common_urns.primitives.GROUP_BY_KEY.urn: for sub_transform in transform_proto.subtransforms: del proto_pipeline.components.transforms[sub_transform] del transform_proto.subtransforms[:] job_service = self._create_job_service() prepare_response = job_service.Prepare( beam_job_api_pb2.PrepareJobRequest(job_name='job', pipeline=proto_pipeline)) if prepare_response.artifact_staging_endpoint.url: # Must commit something to get a retrieval token, # committing empty manifest for now. # TODO(BEAM-3883): Actually stage required files. artifact_service = beam_artifact_api_pb2_grpc.ArtifactStagingServiceStub( grpc.insecure_channel( prepare_response.artifact_staging_endpoint.url)) commit_manifest = artifact_service.CommitManifest( beam_artifact_api_pb2.CommitManifestRequest( manifest=beam_artifact_api_pb2.Manifest(), staging_session_token=prepare_response. staging_session_token)) retrieval_token = commit_manifest.retrieval_token else: retrieval_token = None run_response = job_service.Run( beam_job_api_pb2.RunJobRequest( preparation_id=prepare_response.preparation_id, retrieval_token=retrieval_token)) return PipelineResult(job_service, run_response.job_id)
def run_pipeline(self, pipeline): docker_image = ( pipeline.options.view_as(PortableOptions).harness_docker_image or self.default_docker_image()) job_endpoint = pipeline.options.view_as(PortableOptions).job_endpoint if not job_endpoint: raise ValueError( 'job_endpoint should be provided while creating runner.') proto_context = pipeline_context.PipelineContext( default_environment_url=docker_image) proto_pipeline = pipeline.to_runner_api(context=proto_context) # Some runners won't detect the GroupByKey transform unless it has no # subtransforms. Remove all sub-transforms until BEAM-4605 is resolved. for _, transform_proto in list( proto_pipeline.components.transforms.items()): if transform_proto.spec.urn == common_urns.primitives.GROUP_BY_KEY.urn: for sub_transform in transform_proto.subtransforms: del proto_pipeline.components.transforms[sub_transform] del transform_proto.subtransforms[:] job_service = beam_job_api_pb2_grpc.JobServiceStub( grpc.insecure_channel(job_endpoint)) prepare_response = job_service.Prepare( beam_job_api_pb2.PrepareJobRequest(job_name='job', pipeline=proto_pipeline)) if prepare_response.artifact_staging_endpoint.url: # Must commit something to get a retrieval token, # committing empty manifest for now. # TODO(BEAM-3883): Actually stage required files. artifact_service = beam_artifact_api_pb2_grpc.ArtifactStagingServiceStub( grpc.insecure_channel( prepare_response.artifact_staging_endpoint.url)) commit_manifest = artifact_service.CommitManifest( beam_artifact_api_pb2.CommitManifestRequest( manifest=beam_artifact_api_pb2.Manifest(), staging_session_token=prepare_response. staging_session_token)) retrieval_token = commit_manifest.retrieval_token else: retrieval_token = None run_response = job_service.Run( beam_job_api_pb2.RunJobRequest( preparation_id=prepare_response.preparation_id, retrieval_token=retrieval_token)) return PipelineResult(job_service, run_response.job_id)
def create_beam_job(self, preparation_id, job_name, pipeline, options): # TODO(angoenka): Pass an appropriate staging_session_token. The token can # be obtained in PutArtifactResponse from JobService if not self._artifact_staging_endpoint: # The front-end didn't try to stage anything, but the worker may # request what's here so we should at least store an empty manifest. self._artifact_service.CommitManifest( beam_artifact_api_pb2.CommitManifestRequest( staging_session_token=preparation_id, manifest=beam_artifact_api_pb2.Manifest())) provision_info = fn_api_runner.ExtendedProvisionInfo( beam_provision_api_pb2.ProvisionInfo( job_id=preparation_id, job_name=job_name, pipeline_options=options, retrieval_token=self._artifact_service.retrieval_token( preparation_id)), self._staging_dir) return BeamJob(preparation_id, pipeline, options, provision_info, self._artifact_staging_endpoint)
def GetManifest(self, request, context=None): return beam_artifact_api_pb2.GetManifestResponse( manifest=beam_artifact_api_pb2.Manifest())
def test_end_to_end(self, http_mock): with temp_name(suffix='fake.jar') as fake_jar: # Create the jar file with some trivial contents. with zipfile.ZipFile(fake_jar, 'w') as zip: with zip.open('FakeClass.class', 'w') as fout: fout.write(b'[original_contents]') job_server = flink_uber_jar_job_server.FlinkUberJarJobServer( 'http://flink', fake_jar) # Prepare the job. prepare_response = job_server.Prepare( beam_job_api_pb2.PrepareJobRequest( job_name='job', pipeline=beam_runner_api_pb2.Pipeline())) channel = grpc.insecure_channel( prepare_response.artifact_staging_endpoint.url) retrieval_token = beam_artifact_api_pb2_grpc.ArtifactStagingServiceStub( channel).CommitManifest( beam_artifact_api_pb2.CommitManifestRequest( staging_session_token=prepare_response. staging_session_token, manifest=beam_artifact_api_pb2.Manifest()) ).retrieval_token channel.close() # Now actually run the job. http_mock.post('http://flink/v1/jars/upload', json={'filename': '/path/to/jar/nonce'}) http_mock.post('http://flink/v1/jars/nonce/run', json={'jobid': 'some_job_id'}) job_server.Run( beam_job_api_pb2.RunJobRequest( preparation_id=prepare_response.preparation_id, retrieval_token=retrieval_token)) # Check the status until the job is "done" and get all error messages. http_mock.get('http://flink/v1/jobs/some_job_id/execution-result', [{ 'json': { 'status': { 'id': 'IN_PROGRESS' } } }, { 'json': { 'status': { 'id': 'IN_PROGRESS' } } }, { 'json': { 'status': { 'id': 'COMPLETED' } } }]) http_mock.get('http://flink/v1/jobs/some_job_id', json={'state': 'FINISHED'}) http_mock.delete('http://flink/v1/jars/nonce') state_stream = job_server.GetStateStream( beam_job_api_pb2.GetJobStateRequest( job_id=prepare_response.preparation_id)) self.assertEqual([s.state for s in state_stream], [ beam_job_api_pb2.JobState.RUNNING, beam_job_api_pb2.JobState.DONE ]) http_mock.get('http://flink/v1/jobs/some_job_id/exceptions', json={ 'all-exceptions': [{ 'exception': 'exc_text', 'timestamp': 0 }] }) message_stream = job_server.GetMessageStream( beam_job_api_pb2.JobMessagesRequest( job_id=prepare_response.preparation_id)) self.assertEqual([m for m in message_stream], [ beam_job_api_pb2.JobMessagesResponse( message_response=beam_job_api_pb2.JobMessage( message_id='message0', time='0', importance=beam_job_api_pb2.JobMessage. MessageImportance.JOB_MESSAGE_ERROR, message_text='exc_text')), beam_job_api_pb2.JobMessagesResponse( state_response=beam_job_api_pb2.GetJobStateResponse( state=beam_job_api_pb2.JobState.DONE)), ])
def commit(session): return session, self._service.CommitManifest( beam_artifact_api_pb2.CommitManifestRequest( staging_session_token=session, manifest=beam_artifact_api_pb2.Manifest( artifact=artifacts[session]))).retrieval_token
def _run_staging(self, staging_service, retrieval_service): staging_session_token = '/session_staging_token \n\0*' # First stage some files. staging_service.PutArtifact( iter([ self.put_metadata(staging_session_token, 'name'), self.put_data(b'data') ])) staging_service.PutArtifact( iter([ self.put_metadata(staging_session_token, 'many_chunks'), self.put_data(b'a'), self.put_data(b'b'), self.put_data(b'c') ])) staging_service.PutArtifact( iter([ self.put_metadata(staging_session_token, 'long'), self.put_data(b'a' * 1000) ])) staging_service.PutArtifact( iter([ self.put_metadata(staging_session_token, 'with_hash', hashlib.sha256(b'data...').hexdigest()), self.put_data(b'data'), self.put_data(b'...') ])) with self.assertRaises(Exception): staging_service.PutArtifact( iter([ self.put_metadata(staging_session_token, 'bad_hash', 'bad_hash'), self.put_data(b'data') ])) manifest = beam_artifact_api_pb2.Manifest(artifact=[ beam_artifact_api_pb2.ArtifactMetadata(name='name'), beam_artifact_api_pb2.ArtifactMetadata(name='many_chunks'), beam_artifact_api_pb2.ArtifactMetadata(name='long'), beam_artifact_api_pb2.ArtifactMetadata(name='with_hash'), ]) retrieval_token = staging_service.CommitManifest( beam_artifact_api_pb2.CommitManifestRequest( staging_session_token=staging_session_token, manifest=manifest)).retrieval_token # Now attempt to retrieve them. retrieved_manifest = retrieval_service.GetManifest( beam_artifact_api_pb2.GetManifestRequest( retrieval_token=retrieval_token)).manifest self.assertEqual(manifest, retrieved_manifest) self.assertEqual( b'data', self.retrieve_artifact(retrieval_service, retrieval_token, 'name')) self.assertEqual( b'abc', self.retrieve_artifact(retrieval_service, retrieval_token, 'many_chunks')) self.assertEqual( b'a' * 1000, self.retrieve_artifact(retrieval_service, retrieval_token, 'long')) self.assertEqual( b'data...', self.retrieve_artifact(retrieval_service, retrieval_token, 'with_hash')) with self.assertRaises(Exception): self.retrieve_artifact(retrieval_service, retrieval_token, 'bad_hash') with self.assertRaises(Exception): self.retrieve_artifact(retrieval_service, retrieval_token, 'missing')
def test_end_to_end(self, http_mock): submission_id = "submission-id" worker_host_port = "workerhost:12345" worker_id = "worker-id" server_spark_version = "1.2.3" def spark_submission_status_response(state): return { 'json': { "action": "SubmissionStatusResponse", "driverState": state, "serverSparkVersion": server_spark_version, "submissionId": submission_id, "success": "true", "workerHostPort": worker_host_port, "workerId": worker_id } } with temp_name(suffix='fake.jar') as fake_jar: with zipfile.ZipFile(fake_jar, 'w') as zip: with zip.open('spark-version-info.properties', 'w') as fout: fout.write(b'version=4.5.6') options = pipeline_options.SparkRunnerOptions() options.spark_job_server_jar = fake_jar job_server = spark_uber_jar_job_server.SparkUberJarJobServer( 'http://host:6066', options) # Prepare the job. prepare_response = job_server.Prepare( beam_job_api_pb2.PrepareJobRequest( job_name='job', pipeline=beam_runner_api_pb2.Pipeline())) channel = grpc.insecure_channel( prepare_response.artifact_staging_endpoint.url) retrieval_token = beam_artifact_api_pb2_grpc.LegacyArtifactStagingServiceStub( channel).CommitManifest( beam_artifact_api_pb2.CommitManifestRequest( staging_session_token=prepare_response. staging_session_token, manifest=beam_artifact_api_pb2.Manifest()) ).retrieval_token channel.close() # Now actually run the job. http_mock.post( 'http://host:6066/v1/submissions/create', json={ "action": "CreateSubmissionResponse", "message": "Driver successfully submitted as submission-id", "serverSparkVersion": "1.2.3", "submissionId": "submission-id", "success": "true" }) job_server.Run( beam_job_api_pb2.RunJobRequest( preparation_id=prepare_response.preparation_id, retrieval_token=retrieval_token)) # Check the status until the job is "done" and get all error messages. http_mock.get( 'http://host:6066/v1/submissions/status/submission-id', [ spark_submission_status_response('RUNNING'), spark_submission_status_response('RUNNING'), { 'json': { "action": "SubmissionStatusResponse", "driverState": "ERROR", "message": "oops", "serverSparkVersion": "1.2.3", "submissionId": submission_id, "success": "true", "workerHostPort": worker_host_port, "workerId": worker_id } } ]) state_stream = job_server.GetStateStream( beam_job_api_pb2.GetJobStateRequest( job_id=prepare_response.preparation_id)) self.assertEqual([s.state for s in state_stream], [ beam_job_api_pb2.JobState.STOPPED, beam_job_api_pb2.JobState.RUNNING, beam_job_api_pb2.JobState.RUNNING, beam_job_api_pb2.JobState.FAILED ]) message_stream = job_server.GetMessageStream( beam_job_api_pb2.JobMessagesRequest( job_id=prepare_response.preparation_id)) def get_item(x): if x.HasField('message_response'): return x.message_response else: return x.state_response.state self.assertEqual([get_item(m) for m in message_stream], [ beam_job_api_pb2.JobState.STOPPED, beam_job_api_pb2.JobState.RUNNING, beam_job_api_pb2.JobMessage( message_id='message0', time='0', importance=beam_job_api_pb2.JobMessage.MessageImportance. JOB_MESSAGE_ERROR, message_text="oops"), beam_job_api_pb2.JobState.FAILED, ])