def test_file_retrieval(self):
        file_manager = InMemoryFileManager({
            'path/to/a': b'a',
            'path/to/b': b'b' * 37
        })
        retrieval_service = artifact_service.ArtifactRetrievalService(
            file_manager.file_reader, chunk_size=10)
        dep_a = self.file_artifact('path/to/a')
        self.assertEqual(
            retrieval_service.ResolveArtifacts(
                beam_artifact_api_pb2.ResolveArtifactsRequest(
                    artifacts=[dep_a])),
            beam_artifact_api_pb2.ResolveArtifactsResponse(
                replacements=[dep_a]))

        self.assertEqual(
            list(
                retrieval_service.GetArtifact(
                    beam_artifact_api_pb2.GetArtifactRequest(artifact=dep_a))),
            [beam_artifact_api_pb2.GetArtifactResponse(data=b'a')])
        self.assertEqual(
            list(
                retrieval_service.GetArtifact(
                    beam_artifact_api_pb2.GetArtifactRequest(
                        artifact=self.file_artifact('path/to/b')))),
            [
                beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 10),
                beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 10),
                beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 10),
                beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 7)
            ])
Example #2
0
def main(unused_argv):
    PyPIArtifactRegistry.register_artifact('beautifulsoup4', '>=4.9,<5.0')
    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--port',
                        type=int,
                        help='port on which to serve the job api')
    parser.add_argument('--fully_qualified_name_glob', default=None)
    options = parser.parse_args()

    global server
    with fully_qualified_named_transform.FullyQualifiedNamedTransform.with_filter(
            options.fully_qualified_name_glob):
        server = grpc.server(thread_pool_executor.shared_unbounded_instance())
        beam_expansion_api_pb2_grpc.add_ExpansionServiceServicer_to_server(
            expansion_service.ExpansionServiceServicer(
                PipelineOptions([
                    "--experiments", "beam_fn_api", "--sdk_location",
                    "container"
                ])), server)
        beam_artifact_api_pb2_grpc.add_ArtifactRetrievalServiceServicer_to_server(
            artifact_service.ArtifactRetrievalService(
                artifact_service.BeamFilesystemHandler(None).file_reader),
            server)
        server.add_insecure_port('localhost:{}'.format(options.port))
        server.start()
        _LOGGER.info('Listening for expansion requests at %d', options.port)

        signal.signal(signal.SIGTERM, cleanup)
        signal.signal(signal.SIGINT, cleanup)
        # blocking main thread forever.
        signal.pause()
Example #3
0
def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument('-p',
                        '--port',
                        type=int,
                        help='port on which to serve the job api')
    parser.add_argument('--fully_qualified_name_glob', default=None)
    known_args, pipeline_args = parser.parse_known_args(argv)
    pipeline_options = PipelineOptions(
        pipeline_args +
        ["--experiments=beam_fn_api", "--sdk_location=container"])

    with fully_qualified_named_transform.FullyQualifiedNamedTransform.with_filter(
            known_args.fully_qualified_name_glob):

        server = grpc.server(thread_pool_executor.shared_unbounded_instance())
        beam_expansion_api_pb2_grpc.add_ExpansionServiceServicer_to_server(
            expansion_service.ExpansionServiceServicer(pipeline_options),
            server)
        beam_artifact_api_pb2_grpc.add_ArtifactRetrievalServiceServicer_to_server(
            artifact_service.ArtifactRetrievalService(
                artifact_service.BeamFilesystemHandler(None).file_reader),
            server)
        server.add_insecure_port('localhost:{}'.format(known_args.port))
        server.start()
        _LOGGER.info('Listening for expansion requests at %d', known_args.port)

        def cleanup(unused_signum, unused_frame):
            _LOGGER.info('Shutting down expansion service.')
            server.stop(None)

        signal.signal(signal.SIGTERM, cleanup)
        signal.signal(signal.SIGINT, cleanup)
        # blocking main thread forever.
        signal.pause()
Example #4
0
 def _stage_via_portable_service(self, artifact_staging_channel,
                                 staging_session_token):
     artifact_service.offer_artifacts(
         beam_artifact_api_pb2_grpc.ArtifactStagingServiceStub(
             channel=artifact_staging_channel),
         artifact_service.ArtifactRetrievalService(
             artifact_service.BeamFilesystemHandler(None).file_reader),
         staging_session_token)
 def test_embedded_retrieval(self):
     retrieval_service = artifact_service.ArtifactRetrievalService(None)
     embedded_dep = self.embedded_artifact(b'some_data')
     self.assertEqual(
         list(
             retrieval_service.GetArtifact(
                 beam_artifact_api_pb2.GetArtifactRequest(
                     artifact=embedded_dep))),
         [beam_artifact_api_pb2.GetArtifactResponse(data=b'some_data')])
Example #6
0
 def stage(self, pipeline, artifact_staging_endpoint,
           staging_session_token):
     # type: (...) -> Optional[Any]
     """Stage artifacts"""
     if artifact_staging_endpoint:
         artifact_service.offer_artifacts(
             beam_artifact_api_pb2_grpc.ArtifactStagingServiceStub(
                 channel=grpc.insecure_channel(artifact_staging_endpoint)),
             artifact_service.ArtifactRetrievalService(
                 artifact_service.BeamFilesystemHandler(None).file_reader),
             staging_session_token)
 def test_url_retrieval(self):
     retrieval_service = artifact_service.ArtifactRetrievalService(None)
     url_dep = beam_runner_api_pb2.ArtifactInformation(
         type_urn=common_urns.artifact_types.URL.urn,
         type_payload=beam_runner_api_pb2.ArtifactUrlPayload(
             url='file:' + quote(__file__)).SerializeToString())
     content = b''.join([
         r.data for r in retrieval_service.GetArtifact(
             beam_artifact_api_pb2.GetArtifactRequest(artifact=url_dep))
     ])
     with open(__file__, 'rb') as fin:
         self.assertEqual(content, fin.read())
Example #8
0
    def __init__(
            self,
            state,  # type: StateServicer
            provision_info,  # type: Optional[ExtendedProvisionInfo]
            worker_manager,  # type: WorkerHandlerManager
    ):
        # type: (...) -> None
        self.state = state
        self.provision_info = provision_info
        self.control_server = grpc.server(
            thread_pool_executor.shared_unbounded_instance())
        self.control_port = self.control_server.add_insecure_port('[::]:0')
        self.control_address = 'localhost:%s' % self.control_port

        # Options to have no limits (-1) on the size of the messages
        # received or sent over the data plane. The actual buffer size
        # is controlled in a layer above.
        no_max_message_sizes = [("grpc.max_receive_message_length", -1),
                                ("grpc.max_send_message_length", -1)]
        self.data_server = grpc.server(
            thread_pool_executor.shared_unbounded_instance(),
            options=no_max_message_sizes)
        self.data_port = self.data_server.add_insecure_port('[::]:0')

        self.state_server = grpc.server(
            thread_pool_executor.shared_unbounded_instance(),
            options=no_max_message_sizes)
        self.state_port = self.state_server.add_insecure_port('[::]:0')

        self.control_handler = BeamFnControlServicer(worker_manager)
        beam_fn_api_pb2_grpc.add_BeamFnControlServicer_to_server(
            self.control_handler, self.control_server)

        # If we have provision info, serve these off the control port as well.
        if self.provision_info:
            if self.provision_info.provision_info:
                beam_provision_api_pb2_grpc.add_ProvisionServiceServicer_to_server(
                    BasicProvisionService(self.provision_info.provision_info,
                                          worker_manager), self.control_server)

            def open_uncompressed(f):
                # type: (str) -> BinaryIO
                return filesystems.FileSystems.open(
                    f, compression_type=CompressionTypes.UNCOMPRESSED)

            beam_artifact_api_pb2_grpc.add_ArtifactRetrievalServiceServicer_to_server(
                artifact_service.ArtifactRetrievalService(
                    file_reader=open_uncompressed), self.control_server)

        self.data_plane_handler = data_plane.BeamFnDataServicer(
            DATA_BUFFER_TIME_LIMIT_MS)
        beam_fn_api_pb2_grpc.add_BeamFnDataServicer_to_server(
            self.data_plane_handler, self.data_server)

        beam_fn_api_pb2_grpc.add_BeamFnStateServicer_to_server(
            GrpcStateServicer(state), self.state_server)

        self.logging_server = grpc.server(
            thread_pool_executor.shared_unbounded_instance(),
            options=no_max_message_sizes)
        self.logging_port = self.logging_server.add_insecure_port('[::]:0')
        beam_fn_api_pb2_grpc.add_BeamFnLoggingServicer_to_server(
            BasicLoggingService(), self.logging_server)

        _LOGGER.info('starting control server on port %s', self.control_port)
        _LOGGER.info('starting data server on port %s', self.data_port)
        _LOGGER.info('starting state server on port %s', self.state_port)
        _LOGGER.info('starting logging server on port %s', self.logging_port)
        self.logging_server.start()
        self.state_server.start()
        self.data_server.start()
        self.control_server.start()