コード例 #1
0
ファイル: serve.py プロジェクト: pj1987111/pyjava
    def serve(self, data):
        from pyjava.api.mlsql import PythonContext
        if not self.is_bind:
            raise SocketNotBindException(
                "Please invoke server.bind() before invoke server.serve")
        conn, addr = self.socket.accept()
        sockfile = conn.makefile("rwb", int(
            os.environ.get("BUFFER_SIZE", 65536)))
        infile = sockfile  # os.fdopen(os.dup(conn.fileno()), "rb", 65536)
        out = sockfile  # os.fdopen(os.dup(conn.fileno()), "wb", 65536)
        try:
            write_int(SpecialLengths.START_ARROW_STREAM, out)            
            out_data = ([df[name] for name in df] for df in
                    PythonContext.build_chunk_result(data, 1024))                                    
            self.out_ser.dump_stream(out_data, out)

            write_int(SpecialLengths.END_OF_DATA_SECTION, out)
            write_int(SpecialLengths.END_OF_STREAM, out)
            out.flush()
            if self.is_dev:
                print("all data  in ray task have been consumed.")
            read_int(infile)
        except Exception:
            try:
                write_int(SpecialLengths.ARROW_STREAM_CRASH, out)
                ex = traceback.format_exc()
                print(ex)
                write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, out)
                write_with_length(ex.encode("utf-8"), out)
                out.flush()
                read_int(infile)
            except IOError:
                # JVM close the socket
                pass
            except Exception:
                # Write the error to stderr if it happened while serializing
                print("Py worker failed with exception:")
                print(traceback.format_exc())
                pass

        conn.close()
コード例 #2
0
        def process():
            input_data = ser.load_stream(infile)
            code = compile(command, '<string>', 'exec')

            if is_interactive:
                global data_manager
                data_manager = Data(input_data, conf)
                global globals_namespace
                exec(code, globals_namespace, globals_namespace)
            else:
                data_manager = Data(input_data, conf)
                n_local = {"data_manager": data_manager}
                g_local = {}
                exec(code, g_local, n_local)
            out_iter = data_manager.output()
            try:
                write_int(SpecialLengths.START_ARROW_STREAM, outfile)
                out_ser.dump_stream(out_iter, outfile)
            finally:
                if hasattr(out_iter, 'close'):
                    out_iter.close()
コード例 #3
0
ファイル: worker.py プロジェクト: pj1987111/pyjava
        def process():
            try:
                input_data = ser.load_stream(infile)
                code = CodeCache.get(command)
                if is_interactive:
                    global data_manager
                    global context
                    data_manager = PythonContext(context_id, input_data, conf)
                    context = data_manager
                    global globals_namespace
                    exec(code, globals_namespace, globals_namespace)
                else:
                    data_manager = PythonContext(context_id, input_data, conf)
                    n_local = {
                        "data_manager": data_manager,
                        "context": data_manager
                    }
                    exec(code, n_local, n_local)
                out_iter = data_manager.output()
                write_int(SpecialLengths.START_ARROW_STREAM, outfile)
                out_ser.dump_stream(out_iter, outfile)
            finally:

                try:
                    import shutil
                    shutil.rmtree(context_id)
                except:
                    pass

                try:
                    if hasattr(out_iter, 'close'):
                        out_iter.close()
                except:
                    pass

                try:
                    del data_manager
                except:
                    pass
コード例 #4
0
ファイル: worker.py プロジェクト: pj1987111/pyjava
def main(infile, outfile):
    try:
        try:
            import ray
        except ImportError:
            pass
        # set up memory limits
        memory_limit_mb = int(os.environ.get('PY_EXECUTOR_MEMORY', "-1"))
        if memory_limit_mb > 0 and has_resource_module:
            total_memory = resource.RLIMIT_AS
            try:
                (soft_limit, hard_limit) = resource.getrlimit(total_memory)
                msg = "Current mem limits: {0} of max {1}\n".format(
                    soft_limit, hard_limit)
                print(msg, file=sys.stderr)

                # convert to bytes
                new_limit = memory_limit_mb * 1024 * 1024

                if soft_limit == resource.RLIM_INFINITY or new_limit < soft_limit:
                    msg = "Setting mem limits to {0} of max {1}\n".format(
                        new_limit, new_limit)
                    print(msg, file=sys.stderr)
                    resource.setrlimit(total_memory, (new_limit, new_limit))

            except (resource.error, OSError, ValueError) as e:
                # not all systems support resource limits, so warn instead of failing
                print("WARN: Failed to set memory limit: {0}\n".format(e),
                      file=sys.stderr)
        split_index = read_int(infile)
        print("split_index:%s" % split_index)
        if split_index == -1:  # for unit tests
            sys.exit(-1)

        is_barrier = read_bool(infile)
        bound_port = read_int(infile)

        conf = {}
        for i in range(read_int(infile)):
            k = utf8_deserializer.loads(infile)
            v = utf8_deserializer.loads(infile)
            conf[k] = v

        command = utf8_deserializer.loads(infile)
        ser = ArrowStreamSerializer()

        timezone = conf["timezone"] if "timezone" in conf else None

        out_ser = ArrowStreamPandasSerializer(timezone, True, True)
        is_interactive = os.environ.get('PY_INTERACTIVE', "no") == "yes"
        import uuid
        context_id = str(uuid.uuid4())

        if not os.path.exists(context_id):
            os.mkdir(context_id)

        def process():
            try:
                input_data = ser.load_stream(infile)
                code = CodeCache.get(command)
                if is_interactive:
                    global data_manager
                    global context
                    data_manager = PythonContext(context_id, input_data, conf)
                    context = data_manager
                    global globals_namespace
                    exec(code, globals_namespace, globals_namespace)
                else:
                    data_manager = PythonContext(context_id, input_data, conf)
                    n_local = {
                        "data_manager": data_manager,
                        "context": data_manager
                    }
                    exec(code, n_local, n_local)
                out_iter = data_manager.output()
                write_int(SpecialLengths.START_ARROW_STREAM, outfile)
                out_ser.dump_stream(out_iter, outfile)
            finally:

                try:
                    import shutil
                    shutil.rmtree(context_id)
                except:
                    pass

                try:
                    if hasattr(out_iter, 'close'):
                        out_iter.close()
                except:
                    pass

                try:
                    del data_manager
                except:
                    pass

        process()

    except Exception:
        try:
            write_int(SpecialLengths.ARROW_STREAM_CRASH, outfile)
            write_int(SpecialLengths.PYTHON_EXCEPTION_THROWN, outfile)
            write_with_length(traceback.format_exc().encode("utf-8"), outfile)
        except IOError:
            # JVM close the socket
            pass
        except Exception:
            # Write the error to stderr if it happened while serializing
            print("Py worker failed with exception:", file=sys.stderr)
            print(traceback.format_exc(), file=sys.stderr)
        sys.exit(-1)

    write_int(SpecialLengths.END_OF_DATA_SECTION, outfile)
    flag = read_int(infile)
    if flag == SpecialLengths.END_OF_STREAM:
        write_int(SpecialLengths.END_OF_STREAM, outfile)
    else:
        # write a different value to tell JVM to not reuse this worker
        write_int(SpecialLengths.END_OF_DATA_SECTION, outfile)
        sys.exit(-1)
コード例 #5
0
ファイル: daemon.py プロジェクト: zzcclp/pyjava
def manager():
    # Create a new process group to corral our children
    os.setpgid(0, 0)

    # Create a listening socket on the AF_INET loopback interface
    listen_sock = socket.socket(AF_INET, SOCK_STREAM)
    listen_sock.bind(('127.0.0.1', 0))
    listen_sock.listen(max(1024, SOMAXCONN))
    listen_host, listen_port = listen_sock.getsockname()

    # re-open stdin/stdout in 'wb' mode
    stdin_bin = os.fdopen(sys.stdin.fileno(), 'rb', 4)
    stdout_bin = os.fdopen(sys.stdout.fileno(), 'wb', 4)
    write_int(listen_port, stdout_bin)
    stdout_bin.flush()

    def shutdown(code):
        signal.signal(SIGTERM, SIG_DFL)
        # Send SIGHUP to notify workers of shutdown
        os.kill(0, SIGHUP)
        sys.exit(code)

    def handle_sigterm(*args):
        shutdown(1)

    signal.signal(SIGTERM, handle_sigterm)  # Gracefully exit on SIGTERM
    signal.signal(SIGHUP, SIG_IGN)  # Don't die on SIGHUP
    signal.signal(SIGCHLD, SIG_IGN)

    reuse = os.environ.get("PY_WORKER_REUSE")

    # Initialization complete
    try:
        while True:
            try:
                ready_fds = select.select([0, listen_sock], [], [], 1)[0]
            except select.error as ex:
                if ex[0] == EINTR:
                    continue
                else:
                    raise

            if 0 in ready_fds:
                try:
                    worker_pid = read_int(stdin_bin)
                except EOFError:
                    # Spark told us to exit by closing stdin
                    shutdown(0)
                try:
                    os.kill(worker_pid, signal.SIGKILL)
                except OSError:
                    pass  # process already died

            if listen_sock in ready_fds:
                try:
                    sock, _ = listen_sock.accept()
                except OSError as e:
                    if e.errno == EINTR:
                        continue
                    raise

                # Launch a worker process
                try:
                    pid = os.fork()
                except OSError as e:
                    if e.errno in (EAGAIN, EINTR):
                        time.sleep(1)
                        pid = os.fork()  # error here will shutdown daemon
                    else:
                        outfile = sock.makefile(mode='wb')
                        write_int(e.errno,
                                  outfile)  # Signal that the fork failed
                        outfile.flush()
                        outfile.close()
                        sock.close()
                        continue

                if pid == 0:
                    # in child process
                    listen_sock.close()

                    # It should close the standard input in the child process so that
                    # Python native function executions stay intact.
                    #
                    # Note that if we just close the standard input (file descriptor 0),
                    # the lowest file descriptor (file descriptor 0) will be allocated,
                    # later when other file descriptors should happen to open.
                    #
                    # Therefore, here we redirects it to '/dev/null' by duplicating
                    # another file descriptor for '/dev/null' to the standard input (0).
                    # See SPARK-26175.
                    devnull = open(os.devnull, 'r')
                    os.dup2(devnull.fileno(), 0)
                    devnull.close()

                    try:
                        # Acknowledge that the fork was successful
                        outfile = sock.makefile(mode="wb")
                        write_int(os.getpid(), outfile)
                        outfile.flush()
                        outfile.close()
                        while True:
                            code = worker(sock)
                            if not reuse or code:
                                # wait for closing
                                try:
                                    while sock.recv(1024):
                                        pass
                                except Exception:
                                    pass
                                break
                            gc.collect()
                    except:
                        traceback.print_exc()
                        os._exit(1)
                    else:
                        os._exit(0)
                else:
                    sock.close()

    finally:
        shutdown(1)
コード例 #6
0
ファイル: test2.py プロジェクト: zzcclp/pyjava
import os
import socket

from pyjava.serializers import ArrowStreamPandasSerializer, read_int, write_int

out_ser = ArrowStreamPandasSerializer("Asia/Harbin", False, None)
HOST = "127.0.0.1"
PORT = 11111
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
    sock.connect((HOST, PORT))
    buffer_size = int(os.environ.get("SPARK_BUFFER_SIZE", 65536))
    infile = os.fdopen(os.dup(sock.fileno()), "rb", buffer_size)
    outfile = os.fdopen(os.dup(sock.fileno()), "wb", buffer_size)
    # arrow start
    print(read_int(infile))
    kk = out_ser.load_stream(infile)

    for item in kk:
        print(item)
    # end data
    print(read_int(infile))
    # end stream
    print(read_int(infile))
    write_int(-4,outfile)