コード例 #1
0
ファイル: serve.py プロジェクト: pj1987111/pyjava
 def __init__(self, host, port, timezone):
     self.host = host
     self.port = port
     self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     self.socket.settimeout(5 * 60)
     self.out_ser = ArrowStreamPandasSerializer(timezone, False, None)
     self.is_bind = False
     self.is_dev = utils.is_dev()
コード例 #2
0
ファイル: worker.py プロジェクト: pj1987111/pyjava
def main(infile, outfile):
    try:
        try:
            import ray
        except ImportError:
            pass
        # set up memory limits
        memory_limit_mb = int(os.environ.get('PY_EXECUTOR_MEMORY', "-1"))
        if memory_limit_mb > 0 and has_resource_module:
            total_memory = resource.RLIMIT_AS
            try:
                (soft_limit, hard_limit) = resource.getrlimit(total_memory)
                msg = "Current mem limits: {0} of max {1}\n".format(
                    soft_limit, hard_limit)
                print(msg, file=sys.stderr)

                # convert to bytes
                new_limit = memory_limit_mb * 1024 * 1024

                if soft_limit == resource.RLIM_INFINITY or new_limit < soft_limit:
                    msg = "Setting mem limits to {0} of max {1}\n".format(
                        new_limit, new_limit)
                    print(msg, file=sys.stderr)
                    resource.setrlimit(total_memory, (new_limit, new_limit))

            except (resource.error, OSError, ValueError) as e:
                # not all systems support resource limits, so warn instead of failing
                print("WARN: Failed to set memory limit: {0}\n".format(e),
                      file=sys.stderr)
        split_index = read_int(infile)
        print("split_index:%s" % split_index)
        if split_index == -1:  # for unit tests
            sys.exit(-1)

        is_barrier = read_bool(infile)
        bound_port = read_int(infile)

        conf = {}
        for i in range(read_int(infile)):
            k = utf8_deserializer.loads(infile)
            v = utf8_deserializer.loads(infile)
            conf[k] = v

        command = utf8_deserializer.loads(infile)
        ser = ArrowStreamSerializer()

        timezone = conf["timezone"] if "timezone" in conf else None

        out_ser = ArrowStreamPandasSerializer(timezone, True, True)
        is_interactive = os.environ.get('PY_INTERACTIVE', "no") == "yes"
        import uuid
        context_id = str(uuid.uuid4())

        if not os.path.exists(context_id):
            os.mkdir(context_id)

        def process():
            try:
                input_data = ser.load_stream(infile)
                code = CodeCache.get(command)
                if is_interactive:
                    global data_manager
                    global context
                    data_manager = PythonContext(context_id, input_data, conf)
                    context = data_manager
                    global globals_namespace
                    exec(code, globals_namespace, globals_namespace)
                else:
                    data_manager = PythonContext(context_id, input_data, conf)
                    n_local = {
                        "data_manager": data_manager,
                        "context": data_manager
                    }
                    exec(code, n_local, n_local)
                out_iter = data_manager.output()
                write_int(SpecialLengths.START_ARROW_STREAM, outfile)
                out_ser.dump_stream(out_iter, outfile)
            finally:

                try:
                    import shutil
                    shutil.rmtree(context_id)
                except:
                    pass

                try:
                    if hasattr(out_iter, 'close'):
                        out_iter.close()
                except:
                    pass

                try:
                    del data_manager
                except:
                    pass

        process()

    except Exception:
        try:
            write_int(SpecialLengths.ARROW_STREAM_CRASH, outfile)
            write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, outfile)
            write_with_length(traceback.format_exc().encode("utf-8"), outfile)
        except IOError:
            # JVM close the socket
            pass
        except Exception:
            # Write the error to stderr if it happened while serializing
            print("Py worker failed with exception:", file=sys.stderr)
            print(traceback.format_exc(), file=sys.stderr)
        sys.exit(-1)

    write_int(SpecialLengths.END_OF_DATA_SECTION, outfile)
    flag = read_int(infile)
    if flag == SpecialLengths.END_OF_STREAM:
        write_int(SpecialLengths.END_OF_STREAM, outfile)
    else:
        # write a different value to tell JVM to not reuse this worker
        write_int(SpecialLengths.END_OF_DATA_SECTION, outfile)
        sys.exit(-1)
コード例 #3
0
ファイル: serve.py プロジェクト: pj1987111/pyjava
class OnceServer(object):
    def __init__(self, host, port, timezone):
        self.host = host
        self.port = port
        self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        self.socket.settimeout(5 * 60)
        self.out_ser = ArrowStreamPandasSerializer(timezone, False, None)
        self.is_bind = False
        self.is_dev = utils.is_dev()

    def bind(self):
        try:
            self.socket.bind((self.host, self.port))
            self.is_bind = True
            self.socket.listen(1)
        except Exception:
            print(traceback.format_exc())

        return self.socket.getsockname()

    def close(self):
        self.socket.close()

    def serve(self, data):
        from pyjava.api.mlsql import PythonContext
        if not self.is_bind:
            raise SocketNotBindException(
                "Please invoke server.bind() before invoke server.serve")
        conn, addr = self.socket.accept()
        sockfile = conn.makefile("rwb", int(
            os.environ.get("BUFFER_SIZE", 65536)))
        infile = sockfile  # os.fdopen(os.dup(conn.fileno()), "rb", 65536)
        out = sockfile  # os.fdopen(os.dup(conn.fileno()), "wb", 65536)
        try:
            write_int(SpecialLengths.START_ARROW_STREAM, out)            
            out_data = ([df[name] for name in df] for df in
                    PythonContext.build_chunk_result(data, 1024))                                    
            self.out_ser.dump_stream(out_data, out)

            write_int(SpecialLengths.END_OF_DATA_SECTION, out)
            write_int(SpecialLengths.END_OF_STREAM, out)
            out.flush()
            if self.is_dev:
                print("all data  in ray task have been consumed.")
            read_int(infile)
        except Exception:
            try:
                write_int(SpecialLengths.ARROW_STREAM_CRASH, out)
                ex = traceback.format_exc()
                print(ex)
                write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, out)
                write_with_length(ex.encode("utf-8"), out)
                out.flush()
                read_int(infile)
            except IOError:
                # JVM close the socket
                pass
            except Exception:
                # Write the error to stderr if it happened while serializing
                print("Py worker failed with exception:")
                print(traceback.format_exc())
                pass

        conn.close()
コード例 #4
0
ファイル: test2.py プロジェクト: zzcclp/pyjava
import os
import socket

from pyjava.serializers import ArrowStreamPandasSerializer, read_int, write_int

out_ser = ArrowStreamPandasSerializer("Asia/Harbin", False, None)
HOST = "127.0.0.1"
PORT = 11111
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
    sock.connect((HOST, PORT))
    buffer_size = int(os.environ.get("SPARK_BUFFER_SIZE", 65536))
    infile = os.fdopen(os.dup(sock.fileno()), "rb", buffer_size)
    outfile = os.fdopen(os.dup(sock.fileno()), "wb", buffer_size)
    # arrow start
    print(read_int(infile))
    kk = out_ser.load_stream(infile)

    for item in kk:
        print(item)
    # end data
    print(read_int(infile))
    # end stream
    print(read_int(infile))
    write_int(-4,outfile)