Exemple #1
0
    def dump_stream(self, iterator, stream):
        import pyarrow as pa
        import pyjava.utils as utils
        is_dev = utils.is_dev()

        if is_dev:
            print("----pyarrow version---")
            print(pa.__version__)
        writer = None
        try:
            for batch in iterator:
                if is_dev:
                    print(batch.to_pandas())
                if writer is None:
                    writer = pa.RecordBatchStreamWriter(stream, batch.schema)
                writer.write_batch(batch)

            # if iterator is empty, we should write default schema
            if writer is None:
                if is_dev:
                    print("----dump empty arrow---")
                rb = pa.RecordBatch.from_arrays([[]], schema=pa.schema([('value', pa.string())]))
                writer = pa.RecordBatchStreamWriter(stream, rb.schema)
                writer.write_batch(rb)

        finally:
            if writer is not None:
                writer.close()
Exemple #2
0
 def __init__(self, host, port, timezone):
     self.host = host
     self.port = port
     self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     self.socket.settimeout(5 * 60)
     self.out_ser = ArrowStreamPandasSerializer(timezone, False, None)
     self.is_bind = False
     self.is_dev = utils.is_dev()
Exemple #3
0
 def __init__(self, python_context):
     self.python_context = python_context
     self.servers = []
     self.server_ids_in_ray = []
     self.is_setup = False
     self.is_dev = utils.is_dev()
     self.is_in_mlsql = True
     self.mock_data = []
     for item in self.python_context.fetch_once_as_rows():
         self.server_ids_in_ray.append(str(uuid.uuid4()))
         self.servers.append(
             DataServer(item["host"], int(item["port"]), item["timezone"]))
Exemple #4
0
    def __init__(self, server_id, java_server, port=0, timezone="Asia/Harbin"):

        self.server = OnceServer(
            self.get_address(), port, java_server.timezone)
        try:
            (rel_host, rel_port) = self.server.bind()
        except Exception:
            print(traceback.format_exc())

        self.host = rel_host
        self.port = rel_port
        self.timezone = timezone
        self.server_id = server_id
        self.java_server = java_server
        self.is_dev = utils.is_dev()