Example #1
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    # get work to do
    try:
        command = link.get_command()

        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]
        root = command["root"]

        # get the fs provider object
        clazz = get_clazz_in_code(code, FSProvider)
        arg_count = len(inspect.getargspec(clazz.__init__).args)
        fsprovider = None
        if arg_count == 1:
            fsprovider = clazz()
        elif arg_count == 2:
            fsprovider = clazz(root)
        elif arg_count == 3:
            fsprovider = clazz(root, config)
        elif arg_count == 4:
            fsprovider = clazz(root, config, plugin_config)
        else:
            reason = "Wrong signature of the FSProvider subclass: %i args" % arg_count
            link.send_json({'ok': False, 'reason': reason})
            raise Exception(reason)
        link.send_json({'ok': True})

        # loop and process commands
        closed = False
        while not closed:
            request = link.read_json()
            if request is None:
                break

            closed, response = handle_request(request, fsprovider, closed)

            link.send_json(response)

        # send end of stream
        link.send_string('')
    except:
        link.send_string('')  # mark failure
        traceback.print_exc()
        link.send_json(get_json_friendly_error())
    finally:
        # done
        link.close()
Example #2
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    # get work to do
    command = link.get_command()
    try:
        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]

        # get the exporter object
        clazz = get_clazz_in_code(code, Exporter)
        arg_count = len(inspect.getargspec(clazz.__init__).args)
        exporter = None
        if arg_count == 1:
            exporter = clazz()
        elif arg_count == 2:
            exporter = clazz(config)
        elif arg_count == 3:
            exporter = clazz(config, plugin_config)
        else:
            raise Exception(
                "Wrong signature of the Exporter subclass: %i args" %
                arg_count)

        # get task and dispatch work to exporter
        task = command["task"]
        if task == "export":
            # schema is mandatory
            with link.get_input() as input:
                row_count = export_rows(
                    exporter, command["exportBehavior"], command["schema"],
                    input, command.get("destinationFilePath", None))

        else:
            raise Exception("Unexpected task %s" % task)

        # send ack
        link.send_json({'ok': True, 'count': row_count})
    except:
        link.send_string('')  # mark failure
        traceback.print_exc()
        link.send_json(get_json_friendly_error())
    finally:
        # done
        link.close()
Example #3
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    try:
        # get work to do
        command = link.get_command()
        
        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]
        root = command["root"]
        path = command["path"]
        limit = command["limit"]
        
        # get the fs provider object
        clazz = get_clazz_in_code(code, FSProvider)
        arg_count = len(inspect.getargspec(clazz.__init__).args)
        fsprovider = None
        if arg_count == 1:
            fsprovider = clazz()
        elif arg_count == 2:
            fsprovider = clazz(root)
        elif arg_count == 3:
            fsprovider = clazz(root, config)
        elif arg_count == 4:
            fsprovider = clazz(root, config, plugin_config)
        else:
            reason = "Wrong signature of the FSProvider subclass: %i args" % arg_count
            raise Exception(reason)
        
        with link.get_output() as output:
            fsprovider.read(path, output, limit)
            
        # send end of stream
        link.send_string('')
        # send ack
        link.send_json({'ok':True})
    except:
        link.send_string('') # end stream to send ack
        traceback.print_exc()
        error = get_json_friendly_error()
        link.send_json({'ok':False, 'error':error})
    finally:
        # done
        link.close()
Example #4
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()

    command_is_streaming = None

    # get work to do
    command = link.get_command()
    try:
        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]

        # get the formatter object
        clazz = get_clazz_in_code(code, Formatter)
        arg_count = len(inspect.getargspec(clazz.__init__).args)
        formatter = None
        if arg_count == 1:
            formatter = clazz()
        elif arg_count == 2:
            formatter = clazz(config)
        elif arg_count == 3:
            formatter = clazz(config, plugin_config)
        else:
            raise Exception(
                "Wrong signature of the Formatter subclass: %i args" %
                arg_count)

        # get task and dispatch work to formatter
        task = command["task"]
        if task == "read":
            # extract mode
            command_is_streaming = True
            with link.get_input() as input, link.get_output() as output:
                row_count = extract_rows(formatter,
                                         command.get("schema",
                                                     None), input, output)
            # send end of stream
            link.send_string('')
            # send acknowledgment
            link.send_json({'ok': True, 'count': row_count})

        elif task == "write":
            # format mode (schema is mandatory)
            command_is_streaming = True
            with link.get_input() as input, link.get_output() as output:
                row_count = format_rows(formatter, command["schema"], input,
                                        output)
            # send end of stream
            link.send_string('')
            # send acknowledgment
            link.send_json({'ok': True, 'count': row_count})

        elif task == "schema":
            # read schema mode
            command_is_streaming = False
            with link.get_input() as input:
                schema = extract_schema(formatter, input)
                if schema is not None:
                    link.send_json(schema)
                else:
                    link.send_json({
                        'columns': [{
                            'name': '__dku_empty_schema__',
                            'type': 'string'
                        }]
                    })

        else:
            raise Exception("Unexpected task %s" % task)

    except:
        traceback.print_exc()
        error = get_json_friendly_error()
        link.send_string(
            '')  # send null to mark failure or to mark end of stream
        if not command_is_streaming:
            link.send_json(error)
        else:
            link.send_json({'ok': False, 'error': error})
    finally:
        # done
        link.close()
Example #5
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    # get work to do
    command = link.get_command()
    try:
        project_key = command.get("projectKey", {})
        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]
        cluster_id = command["clusterId"]
        cluster_name = command["clusterName"]
        
        # get the exporter object
        clazz = get_clazz_in_code(code, Cluster)
        arg_count = len(inspect.getargspec(clazz.__init__).args)
        cluster = None
        if arg_count == 5:
            cluster = clazz(cluster_id, cluster_name, config, plugin_config)
        else:
            raise Exception("Wrong signature of the Cluster subclass: %i args but expected 5 (self, cluster_id, name, config, plugin_config)" % arg_count)
            
        # work
        call_name = command["type"]
        data = command.get("data", {})
        if call_name == 'start':
            # init progress reporting if relevant
            report_progress = get_progress_callback(cluster.get_start_progress_target, link)

            arg_count = len(inspect.getargspec(cluster.start).args)
            if arg_count == 1:
                result = cluster.start()
            elif arg_count == 2:
                result = cluster.start(report_progress)
            
            if isinstance(result, list) or isinstance(result, tuple):
                cluster_settings = result[0]
                cluster_data = result[1] if len(result) > 1 else {}
                cluster_setup = {}
                cluster_setup['hadoopSettings'] = cluster_settings.get('hadoop', None)
                cluster_setup['hiveSettings'] = cluster_settings.get('hive', None)
                cluster_setup['impalaSettings'] = cluster_settings.get('impala', None)
                cluster_setup['sparkSettings'] = cluster_settings.get('spark', None)
                cluster_setup['data'] = cluster_data
                send_result_json(cluster_setup, link)
            else:
                raise Exception("start() didn't return an object of a valid type: %s" % type(result))
        elif call_name == 'stop':
            # init progress reporting if relevant
            report_progress = get_progress_callback(cluster.get_stop_progress_target, link)

            arg_count = len(inspect.getargspec(cluster.stop).args)
            if arg_count == 2:
                result = cluster.stop(data)
            elif arg_count == 3:
                result = cluster.stop(data, report_progress)
            
            send_result_json({'ok':True}, link)
        else:
            if hasattr(cluster, call_name):
                action_attr = getattr(cluster, call_name)
                if inspect.ismethod(action_attr):
                    result = action_attr(data)
                    # convert to something that is legit for a JsonObject
                    if result is None:
                        result = {}
                    if not isinstance(result, dict):
                        result = {'result':result}
                    # send
                    link.send_json({'ok':True, 'response':result})
                else:
                    raise Exception("Wrong call type : %s is not a method" % call_name)
            else:
                raise Exception("Wrong call type : %s" % call_name)
        # send end of stream (data is expected as a stream)
        link.send_string('')

    except:
        traceback.print_exc()
        send_error(link)
    finally:
        # done
        link.close()
Example #6
0
def serve(port, secret):
    link = JavaLink(port, secret)
    # initiate connection
    link.connect()
    # get work to do
    try:
        command = link.get_command()
        config = command.get("config", {})
        plugin_config = command.get("pluginConfig", {})
        code = command["code"]

        # get the connector object
        clazz = get_clazz_in_code(code, Connector)
        arg_count = len(inspect.getargspec(clazz.__init__).args)
        connector = None
        if arg_count == 1:
            connector = clazz()
        elif arg_count == 2:
            connector = clazz(config)
        elif arg_count == 3:
            connector = clazz(config, plugin_config)
        else:
            raise Exception(
                "Wrong signature of the Connector subclass: %i args" %
                arg_count)

        link.send_json({'ok': True})

        stored_error = None
        # loop and process commands
        while True:
            request = link.read_json()
            if request is None:
                break

            response = None
            task = request["task"]
            logging.info("Processing task: %s" % task)
            if task == "read_rows":
                schema = request.get("schema", None)
                partitioning = request.get("partitioning", None)
                partition_id = request.get("partitionId", None)
                limit = request.get("limit", None)
                stored_error = None
                try:
                    with link.get_output() as output:
                        read_rows(connector, schema, partitioning,
                                  partition_id, limit, output)
                except:
                    logging.exception("Connector send fail, storing exception")
                    stored_error = get_json_friendly_error()
                link.send_string('')
            elif task == "finish_read_session":
                if stored_error is None:
                    link.send_json({"ok": True})
                else:
                    link.send_json({"ok": False, "error": stored_error})
            elif task == "write_rows":
                schema = request.get("schema", None)
                partitioning = request.get("partitioning", None)
                partition_id = request.get("partitionId", None)
                with link.get_input() as input:
                    write_rows(connector, schema, partitioning, partition_id,
                               input)
                link.send_json({'ok': True})
            elif task == "get_schema":
                link.send_json({'schema': connector.get_read_schema()})
            elif task == "get_partitioning_scheme":
                link.send_json({'partitioning': connector.get_partitioning()})
            elif task == "list_partitions":
                partitioning = request.get("partitioning", None)
                link.send_json(
                    {'partitions': connector.list_partitions(partitioning)})
            elif task == "partition_exists":
                partitioning = request.get("partitioning", None)
                partition_id = request.get("partitionId", None)
                link.send_json({
                    "exists":
                    connector.partition_exists(partitioning, partition_id)
                })
            elif task == "records_count":
                partitioning = request.get("partitioning", None)
                partition_id = request.get("partitionId", None)
                link.send_json({
                    "count":
                    connector.get_records_count(partitioning, partition_id)
                })
            else:
                raise Exception("Unexpected task %s" % task)

        # send end of stream
        logging.info("Work done")
        link.send_string('')
    except:
        link.send_string('')  # mark failure
        traceback.print_exc()
        link.send_json(get_json_friendly_error())
    finally:
        # done
        link.close()