Esempio n. 1
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    # get work to do
    try:
        command = link.get_command()

        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]
        root = command["root"]

        # get the fs provider object
        clazz = get_clazz_in_code(code, FSProvider)
        arg_count = len(inspect.getargspec(clazz.__init__).args)
        fsprovider = None
        if arg_count == 1:
            fsprovider = clazz()
        elif arg_count == 2:
            fsprovider = clazz(root)
        elif arg_count == 3:
            fsprovider = clazz(root, config)
        elif arg_count == 4:
            fsprovider = clazz(root, config, plugin_config)
        else:
            reason = "Wrong signature of the FSProvider subclass: %i args" % arg_count
            link.send_json({'ok': False, 'reason': reason})
            raise Exception(reason)
        link.send_json({'ok': True})

        # loop and process commands
        closed = False
        while not closed:
            request = link.read_json()
            if request is None:
                break

            closed, response = handle_request(request, fsprovider, closed)

            link.send_json(response)

        # send end of stream
        link.send_string('')
    except:
        link.send_string('')  # mark failure
        traceback.print_exc()
        link.send_json(get_json_friendly_error())
    finally:
        # done
        link.close()
Esempio n. 2
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()

    # get work to do
    try:
        # retrieve the initialization info and initiate serving
        command = link.get_command()

        function_name = command.get('functionName')
        code_file = command.get('codeFilePath')
        data_folders = command.get('resourceFolderPaths', [])

        loaded_function = LoadedFunction(code_file, function_name,
                                         data_folders)

        logging.info("Predictor ready")
        link.send_json({"ok": True})

        stored_exception = None
        # loop and process commands
        while True:
            request = link.read_json()
            if request is None:
                break

            used_api_key = request.get("usedAPIKey", None)
            if used_api_key is not None:
                os.environ["DKU_CURRENT_REQUEST_USED_API_KEY"] = used_api_key

            before = time.time()
            response = loaded_function.predict(request["params"])
            after = time.time()
            link.send_json({
                'ok': True,
                'resp': response,
                'execTimeUS': int(1000000 * (after - before))
            })

            if used_api_key is not None:
                del os.environ["DKU_CURRENT_REQUEST_USED_API_KEY"]

        # send end of stream
        logging.info("Work done")
        link.send_string('')
    except:
        logging.exception("Function user code failed")
        link.send_string('')  # send null to mark failure
        link.send_json(get_json_friendly_error())
    finally:
        # done
        link.close()
Esempio n. 3
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    try:
        # get work to do
        command = link.get_command()

        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]
        root = command["root"]
        path = command["path"]

        # get the fs provider object
        clazz = get_clazz_in_code(code, FSProvider)
        arg_count = len(inspect.getargspec(clazz.__init__).args)
        fsprovider = None
        if arg_count == 1:
            fsprovider = clazz()
        elif arg_count == 2:
            fsprovider = clazz(root)
        elif arg_count == 3:
            fsprovider = clazz(root, config)
        elif arg_count == 4:
            fsprovider = clazz(root, config, plugin_config)
        else:
            reason = "Wrong signature of the FSProvider subclass: %i args" % arg_count
            raise Exception(reason)

        with link.get_input() as input:
            fsprovider.write(path, input)

        # send ack
        link.send_json({'ok': True})
    except:
        traceback.print_exc()
        error = get_json_friendly_error()
        link.send_json({'ok': False, 'error': error})
    finally:
        # done
        link.close()
Esempio n. 4
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    # get work to do
    command = link.get_command()
    try:
        project_key = command.get("projectKey", {})
        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]
        cluster_id = command["clusterId"]
        cluster_name = command["clusterName"]
        
        # get the exporter object
        clazz = get_clazz_in_code(code, Cluster)
        arg_count = len(inspect.getargspec(clazz.__init__).args)
        cluster = None
        if arg_count == 5:
            cluster = clazz(cluster_id, cluster_name, config, plugin_config)
        else:
            raise Exception("Wrong signature of the Cluster subclass: %i args but expected 5 (self, cluster_id, name, config, plugin_config)" % arg_count)
            
        # work
        call_name = command["type"]
        data = command.get("data", {})
        if call_name == 'start':
            # init progress reporting if relevant
            report_progress = get_progress_callback(cluster.get_start_progress_target, link)

            arg_count = len(inspect.getargspec(cluster.start).args)
            if arg_count == 1:
                result = cluster.start()
            elif arg_count == 2:
                result = cluster.start(report_progress)
            
            if isinstance(result, list) or isinstance(result, tuple):
                cluster_settings = result[0]
                cluster_data = result[1] if len(result) > 1 else {}
                cluster_setup = {}
                cluster_setup['hadoopSettings'] = cluster_settings.get('hadoop', None)
                cluster_setup['hiveSettings'] = cluster_settings.get('hive', None)
                cluster_setup['impalaSettings'] = cluster_settings.get('impala', None)
                cluster_setup['sparkSettings'] = cluster_settings.get('spark', None)
                cluster_setup['data'] = cluster_data
                send_result_json(cluster_setup, link)
            else:
                raise Exception("start() didn't return an object of a valid type: %s" % type(result))
        elif call_name == 'stop':
            # init progress reporting if relevant
            report_progress = get_progress_callback(cluster.get_stop_progress_target, link)

            arg_count = len(inspect.getargspec(cluster.stop).args)
            if arg_count == 2:
                result = cluster.stop(data)
            elif arg_count == 3:
                result = cluster.stop(data, report_progress)
            
            send_result_json({'ok':True}, link)
        else:
            if hasattr(cluster, call_name):
                action_attr = getattr(cluster, call_name)
                if inspect.ismethod(action_attr):
                    result = action_attr(data)
                    # convert to something that is legit for a JsonObject
                    if result is None:
                        result = {}
                    if not isinstance(result, dict):
                        result = {'result':result}
                    # send
                    link.send_json({'ok':True, 'response':result})
                else:
                    raise Exception("Wrong call type : %s is not a method" % call_name)
            else:
                raise Exception("Wrong call type : %s" % call_name)
        # send end of stream (data is expected as a stream)
        link.send_string('')

    except:
        traceback.print_exc()
        send_error(link)
    finally:
        # done
        link.close()
Esempio n. 5
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()

    command_is_streaming = None

    # get work to do
    command = link.get_command()
    try:
        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]

        # get the formatter object
        clazz = get_clazz_in_code(code, Formatter)
        arg_count = len(inspect.getargspec(clazz.__init__).args)
        formatter = None
        if arg_count == 1:
            formatter = clazz()
        elif arg_count == 2:
            formatter = clazz(config)
        elif arg_count == 3:
            formatter = clazz(config, plugin_config)
        else:
            raise Exception(
                "Wrong signature of the Formatter subclass: %i args" %
                arg_count)

        # get task and dispatch work to formatter
        task = command["task"]
        if task == "read":
            # extract mode
            command_is_streaming = True
            with link.get_input() as input, link.get_output() as output:
                row_count = extract_rows(formatter,
                                         command.get("schema",
                                                     None), input, output)
            # send end of stream
            link.send_string('')
            # send acknowledgment
            link.send_json({'ok': True, 'count': row_count})

        elif task == "write":
            # format mode (schema is mandatory)
            command_is_streaming = True
            with link.get_input() as input, link.get_output() as output:
                row_count = format_rows(formatter, command["schema"], input,
                                        output)
            # send end of stream
            link.send_string('')
            # send acknowledgment
            link.send_json({'ok': True, 'count': row_count})

        elif task == "schema":
            # read schema mode
            command_is_streaming = False
            with link.get_input() as input:
                schema = extract_schema(formatter, input)
                if schema is not None:
                    link.send_json(schema)
                else:
                    link.send_json({
                        'columns': [{
                            'name': '__dku_empty_schema__',
                            'type': 'string'
                        }]
                    })

        else:
            raise Exception("Unexpected task %s" % task)

    except:
        traceback.print_exc()
        error = get_json_friendly_error()
        link.send_string(
            '')  # send null to mark failure or to mark end of stream
        if not command_is_streaming:
            link.send_json(error)
        else:
            link.send_json({'ok': False, 'error': error})
    finally:
        # done
        link.close()
Esempio n. 6
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    # get work to do
    try:
        command = link.get_command()
        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]

        # get the connector object
        clazz = get_clazz_in_code(code, Connector)
        arg_count = len(inspect.getargspec(clazz.__init__).args)
        connector = None
        if arg_count == 1:
            connector = clazz()
        elif arg_count == 2:
            connector = clazz(config)
        elif arg_count == 3:
            connector = clazz(config, plugin_config)
        else:
            raise Exception(
                "Wrong signature of the Connector subclass: %i args" %
                arg_count)

        link.send_json({'ok': True})

        stored_error = None
        # loop and process commands
        while True:
            request = link.read_json()
            if request is None:
                break

            response = None
            task = request["task"]
            logging.info("Processing task: %s" % task)
            if task == "read_rows":
                schema = request.get("schema", None)
                partitioning = request.get("partitioning", None)
                partition_id = request.get("partitionId", None)
                limit = request.get("limit", None)
                stored_error = None
                try:
                    with link.get_output() as output:
                        read_rows(connector, schema, partitioning,
                                  partition_id, limit, output)
                except:
                    logging.exception("Connector send fail, storing exception")
                    stored_error = get_json_friendly_error()
                link.send_string('')
            elif task == "finish_read_session":
                if stored_error is None:
                    link.send_json({"ok": True})
                else:
                    link.send_json({"ok": False, "error": stored_error})
            elif task == "write_rows":
                schema = request.get("schema", None)
                partitioning = request.get("partitioning", None)
                partition_id = request.get("partitionId", None)
                with link.get_input() as input:
                    write_rows(connector, schema, partitioning, partition_id,
                               input)
                link.send_json({'ok': True})
            elif task == "get_schema":
                link.send_json({'schema': connector.get_read_schema()})
            elif task == "get_partitioning_scheme":
                link.send_json({'partitioning': connector.get_partitioning()})
            elif task == "list_partitions":
                partitioning = request.get("partitioning", None)
                link.send_json(
                    {'partitions': connector.list_partitions(partitioning)})
            elif task == "partition_exists":
                partitioning = request.get("partitioning", None)
                partition_id = request.get("partitionId", None)
                link.send_json({
                    "exists":
                    connector.partition_exists(partitioning, partition_id)
                })
            elif task == "records_count":
                partitioning = request.get("partitioning", None)
                partition_id = request.get("partitionId", None)
                link.send_json({
                    "count":
                    connector.get_records_count(partitioning, partition_id)
                })
            else:
                raise Exception("Unexpected task %s" % task)

        # send end of stream
        logging.info("Work done")
        link.send_string('')
    except:
        link.send_string('')  # mark failure
        traceback.print_exc()
        link.send_json(get_json_friendly_error())
    finally:
        # done
        link.close()
Esempio n. 7
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    
    # get work to do
    try:
        # retrieve the initialization info and initiate serving
        command = link.get_command()
        model_folder = command.get('modelFolder')
        try:
            conditional_outputs = load_from_filepath(osp.join(model_folder, "conditional_outputs.json"))
        except Exception as e:
            logging.exception("Can't load conditional outputs: " + str(e))
            conditional_outputs = []
        predictor = build_predictor_for_saved_model(model_folder, "PREDICTION", conditional_outputs)
        logging.info("Predictor ready")
        link.send_json({"ok":True})

        stored_exception = None
        # loop and process commands
        while True:
            request = link.read_json()
            if request is None:
                break

            before = time.time()
            response = handle_predict(predictor, request)
            after = time.time()
            response["execTimeUS"] = int(1000000 * (after-before))
            link.send_json(response)
            
        # send end of stream
        logging.info("Work done")
        link.send_string('')
    except:
        logging.exception("Prediction user code failed")
        link.send_string('') # send null to mark failure
        link.send_json(get_json_friendly_error())
    finally:
        # done
        link.close()
Esempio n. 8
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    # get work to do
    command = link.get_command()

    try:
        from dataiku.doctor import commands
        COMMANDS = {
            command_name: json_api(command_method)
            for (command_name, command_method) in commands._list_commands()
        }

        task = command["command"]
        arg = command.get("arg", "")

        logging.info("Running analysis command: %s" % task)
        if task not in COMMANDS:
            raise ValueError("Command %s is unknown." % task)
        else:
            api_command = COMMANDS[task]
            ret = api_command(arg)

        link.send_json(ret)

        # send end of stream
        link.send_string('')
    except:
        link.send_string('')  # mark failure
        traceback.print_exc()
        link.send_json(get_json_friendly_error())
    finally:
        # done
        link.close()
Esempio n. 9
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    
    # get work to do
    try:
        # retrieve the initialization info and initiate serving
        command = link.get_command()
        
        model_type = command.get('modelType')
        code_file = command.get('codeFilePath')
        data_folder = command.get('resourceFolderPath', None)
        
        loaded_model = LoadedModel(model_type, code_file, data_folder)
        
        logging.info("Predictor ready")
        link.send_json({"ok":True})

        stored_exception = None
        # loop and process commands
        while True:
            request = link.read_json()
            if request is None:
                break

            used_api_key =  request.get("usedAPIKey", None)
            if used_api_key is not None:
                os.environ["DKU_CURRENT_REQUEST_USED_API_KEY"] = used_api_key

            before = time.time()
            response = handle_predict(loaded_model, request["obj"])
            after = time.time()
            response["execTimeUS"] = int(1000000 * (after-before))
            link.send_json(response)

            if used_api_key is not None:
                del os.environ["DKU_CURRENT_REQUEST_USED_API_KEY"]
            
        # send end of stream
        logging.info("Work done")
        link.send_string('')
    except:
        ex_type, ex, tb = sys.exc_info()
        traceback.print_exc()
        link.send_string('') # send null to mark failure
        link.send_json({'errorType': str(ex_type), 'message':str(ex), 'traceback':traceback.extract_tb(tb)})
    finally:
        # done
        link.close()
Esempio n. 10
0
def serve(port, secret):
    logging.info("Starting Webapp backend")

    # Connect
    link = JavaLink(port, secret)
    link.connect()

    logging.info("Webapp backend connected to DSS")

    # get work to do
    command = link.get_command()
    try:
        logging.info("Starting backend for web app: %s.%s" %
                     (command["projectKey"], command["webAppId"]))

        @app.route('/__ping')
        def ping():
            return "pong"

        # Execute user's code
        exec(command["code"], globals(),
             globals())  # in globals so that flask can find them

        # Start the server
        from werkzeug.serving import make_server
        srv = make_server("127.0.0.1", 0, app)
        myport = srv.server_port

        link.send_json({'type': 'STARTED', "port": myport})

        srv.serve_forever()

    except:
        logging.exception("Backend main loop failed")
        link.send_json({'type': 'ERROR'})
        link.send_json(get_json_friendly_error())

    finally:
        # done
        link.close()
Esempio n. 11
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    is_serving = False
    # get work to do
    try:
        command = link.get_command()
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]

        # get the helper function
        ctx = {}
        python2_friendly_exec(code, ctx, ctx)

        functions = [o for o in ctx.values() if inspect.isfunction(o)]
        f = functions[0] if len(functions) == 1 else ctx.get('do', None)

        if f is None:
            raise Exception('No function "do" defined')
        f_args_count = len(inspect.getargspec(f).args)
        if f_args_count >= 5:
            reason = "Too many arguments for the do() function : %i args" % f_args_count
            raise Exception(reason)
        link.send_json({'ok': True})

        def call_do(payload, config, plugin_config, inputs):
            result = None
            if f_args_count == 0:
                result = f()
            if f_args_count == 1:
                result = f(payload)
            if f_args_count == 2:
                result = f(payload, config)
            if f_args_count == 3:
                result = f(payload, config, plugin_config)
            if f_args_count == 4:
                result = f(payload, config, plugin_config, inputs)
            return result

        is_serving = True
        # loop and process commands
        while True:
            request = link.read_json()
            if request is None:
                break

            response = call_do(request.get('payload', None),
                               request.get('config', {}), plugin_config,
                               request.get('inputs', []))
            if response is None:
                raise Exception("Empty response to %s" % json.dumps(request))

            link.send_json(response)

        # send end of stream
        link.send_string('')
    except:
        error = get_json_friendly_error()
        link.send_string('')  # mark failure
        traceback.print_exc()
        if not is_serving:
            link.send_json(error)
        else:
            link.send_json({'ok': False, 'error': error})
    finally:
        # done
        link.close()
Esempio n. 12
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    # get work to do
    command = link.get_command()
    try:
        code = command["code"]
        notebook = command["notebook"]
        project_key = command.get("projectKey", None)
        dataset_project_key = command.get("datasetProjectKey", None)
        dataset_name = command.get("datasetName", None)

        # get the expansion function
        ctx = {}
        python2_friendly_exec(code, ctx, ctx)

        functions = [o for o in ctx.values() if inspect.isfunction(o)]
        f = functions[0] if len(functions) == 1 else ctx.get('expand', None)

        if f is None:
            raise Exception('No function "expand" defined')
        f_args_count = len(inspect.getargspec(f).args)
        if f_args_count >= 5:
            reason = "Too many arguments for the expand() function : %i args" % f_args_count
            raise Exception(reason)
        if f_args_count < 1:
            reason = "Too few arguments for the expand() function : %i args" % f_args_count
            raise Exception(reason)

        def call_expand(notebook, project_key, dataset_project_key,
                        dataset_name):
            result = None
            if f_args_count == 1:
                result = f(notebook)
            if f_args_count == 2:
                result = f(notebook, project_key)
            if f_args_count == 3:
                result = f(notebook, project_key, dataset_project_key)
            if f_args_count == 4:
                result = f(notebook, project_key, dataset_project_key,
                           dataset_name)
            return result

        # expand for real
        expanded = call_expand(notebook, project_key, dataset_project_key,
                               dataset_name)

        # send result
        if isinstance(
                expanded, unicode
        ):  # we need to send a str (for python streams) and the java end is waiting for utf8
            expanded = expanded.encode('utf8')

        link.send_block(expanded)

        # send end of stream
        link.send_string('')
        # send ack
        link.send_json({'ok': True, 'count': len(expanded)})
    except:
        link.send_string('')  # mark failure
        traceback.print_exc()
        link.send_json({'ok': False, 'error': get_json_friendly_error()})
    finally:
        # done
        link.close()
Esempio n. 13
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    # get work to do
    command = link.get_command()
    try:
        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]

        # get the exporter object
        clazz = get_clazz_in_code(code, Exporter)
        arg_count = len(inspect.getargspec(clazz.__init__).args)
        exporter = None
        if arg_count == 1:
            exporter = clazz()
        elif arg_count == 2:
            exporter = clazz(config)
        elif arg_count == 3:
            exporter = clazz(config, plugin_config)
        else:
            raise Exception(
                "Wrong signature of the Exporter subclass: %i args" %
                arg_count)

        # get task and dispatch work to exporter
        task = command["task"]
        if task == "export":
            # schema is mandatory
            with link.get_input() as input:
                row_count = export_rows(
                    exporter, command["exportBehavior"], command["schema"],
                    input, command.get("destinationFilePath", None))

        else:
            raise Exception("Unexpected task %s" % task)

        # send ack
        link.send_json({'ok': True, 'count': row_count})
    except:
        link.send_string('')  # mark failure
        traceback.print_exc()
        link.send_json(get_json_friendly_error())
    finally:
        # done
        link.close()
Esempio n. 14
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    # get work to do
    command = link.get_command()
    try:
        # logging.info("Got %s" % json.dumps(command))
        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]

        ctx = {"config": config, "plugin_config": plugin_config}
        exec(code, ctx, ctx)

        functions = [o for o in ctx.values() if inspect.isfunction(o)]
        f = functions[0] if len(functions) == 1 else ctx.get('process', None)

        if f is None:
            raise Exception('No function "process" defined')

        object_type = command['objectType']
        full_name = command['fullName']
        partition_id = command.get('partitionId', None)
        if object_type == "DATASET":
            obj_arg = Dataset(full_name)
        elif object_type == "SAVED_MODEL":
            obj_arg = Model(full_name)
        elif object_type == "MANAGED_FOLDER":
            obj_arg = Folder(full_name)

        # work and get output
        if command['command'] == 'compute':
            result = compute_metric(obj_arg, partition_id, f)
        elif command['command'] == 'check':
            last_values = command.get("lastValues", {})
            result = run_check(obj_arg, partition_id, last_values, f)
        else:
            raise Exception("Unknown command")

        if result is None:
            raise Exception("Code did not return a result")

        link.send_json(result)
    except:
        traceback.print_exc()
        link.send_string('')  # send null to mark failure
        link.send_json(get_json_friendly_error())
    finally:
        # done
        link.close()