def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do try: command = link.get_command() config = command.get("config", {}) plugin_config = command.get("pluginConfig", {}) code = command["code"] root = command["root"] # get the fs provider object clazz = get_clazz_in_code(code, FSProvider) arg_count = len(inspect.getargspec(clazz.__init__).args) fsprovider = None if arg_count == 1: fsprovider = clazz() elif arg_count == 2: fsprovider = clazz(root) elif arg_count == 3: fsprovider = clazz(root, config) elif arg_count == 4: fsprovider = clazz(root, config, plugin_config) else: reason = "Wrong signature of the FSProvider subclass: %i args" % arg_count link.send_json({'ok': False, 'reason': reason}) raise Exception(reason) link.send_json({'ok': True}) # loop and process commands closed = False while not closed: request = link.read_json() if request is None: break closed, response = handle_request(request, fsprovider, closed) link.send_json(response) # send end of stream link.send_string('') except: link.send_string('') # mark failure traceback.print_exc() link.send_json(get_json_friendly_error()) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do command = link.get_command() try: config = command.get("config", {}) plugin_config = command.get("pluginConfig", {}) code = command["code"] # get the exporter object clazz = get_clazz_in_code(code, Exporter) arg_count = len(inspect.getargspec(clazz.__init__).args) exporter = None if arg_count == 1: exporter = clazz() elif arg_count == 2: exporter = clazz(config) elif arg_count == 3: exporter = clazz(config, plugin_config) else: raise Exception( "Wrong signature of the Exporter subclass: %i args" % arg_count) # get task and dispatch work to exporter task = command["task"] if task == "export": # schema is mandatory with link.get_input() as input: row_count = export_rows( exporter, command["exportBehavior"], command["schema"], input, command.get("destinationFilePath", None)) else: raise Exception("Unexpected task %s" % task) # send ack link.send_json({'ok': True, 'count': row_count}) except: link.send_string('') # mark failure traceback.print_exc() link.send_json(get_json_friendly_error()) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() try: # get work to do command = link.get_command() config = command.get("config", {}) plugin_config = command.get("pluginConfig", {}) code = command["code"] root = command["root"] path = command["path"] limit = command["limit"] # get the fs provider object clazz = get_clazz_in_code(code, FSProvider) arg_count = len(inspect.getargspec(clazz.__init__).args) fsprovider = None if arg_count == 1: fsprovider = clazz() elif arg_count == 2: fsprovider = clazz(root) elif arg_count == 3: fsprovider = clazz(root, config) elif arg_count == 4: fsprovider = clazz(root, config, plugin_config) else: reason = "Wrong signature of the FSProvider subclass: %i args" % arg_count raise Exception(reason) with link.get_output() as output: fsprovider.read(path, output, limit) # send end of stream link.send_string('') # send ack link.send_json({'ok':True}) except: link.send_string('') # end stream to send ack traceback.print_exc() error = get_json_friendly_error() link.send_json({'ok':False, 'error':error}) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() command_is_streaming = None # get work to do command = link.get_command() try: config = command.get("config", {}) plugin_config = command.get("pluginConfig", {}) code = command["code"] # get the formatter object clazz = get_clazz_in_code(code, Formatter) arg_count = len(inspect.getargspec(clazz.__init__).args) formatter = None if arg_count == 1: formatter = clazz() elif arg_count == 2: formatter = clazz(config) elif arg_count == 3: formatter = clazz(config, plugin_config) else: raise Exception( "Wrong signature of the Formatter subclass: %i args" % arg_count) # get task and dispatch work to formatter task = command["task"] if task == "read": # extract mode command_is_streaming = True with link.get_input() as input, link.get_output() as output: row_count = extract_rows(formatter, command.get("schema", None), input, output) # send end of stream link.send_string('') # send acknowledgment link.send_json({'ok': True, 'count': row_count}) elif task == "write": # format mode (schema is mandatory) command_is_streaming = True with link.get_input() as input, link.get_output() as output: row_count = format_rows(formatter, command["schema"], input, output) # send end of stream link.send_string('') # send acknowledgment link.send_json({'ok': True, 'count': row_count}) elif task == "schema": # read schema mode command_is_streaming = False with link.get_input() as input: schema = extract_schema(formatter, input) if schema is not None: link.send_json(schema) else: link.send_json({ 'columns': [{ 'name': '__dku_empty_schema__', 'type': 'string' }] }) else: raise Exception("Unexpected task %s" % task) except: traceback.print_exc() error = get_json_friendly_error() link.send_string( '') # send null to mark failure or to mark end of stream if not command_is_streaming: link.send_json(error) else: link.send_json({'ok': False, 'error': error}) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do command = link.get_command() try: project_key = command.get("projectKey", {}) config = command.get("config", {}) plugin_config = command.get("pluginConfig", {}) code = command["code"] cluster_id = command["clusterId"] cluster_name = command["clusterName"] # get the exporter object clazz = get_clazz_in_code(code, Cluster) arg_count = len(inspect.getargspec(clazz.__init__).args) cluster = None if arg_count == 5: cluster = clazz(cluster_id, cluster_name, config, plugin_config) else: raise Exception("Wrong signature of the Cluster subclass: %i args but expected 5 (self, cluster_id, name, config, plugin_config)" % arg_count) # work call_name = command["type"] data = command.get("data", {}) if call_name == 'start': # init progress reporting if relevant report_progress = get_progress_callback(cluster.get_start_progress_target, link) arg_count = len(inspect.getargspec(cluster.start).args) if arg_count == 1: result = cluster.start() elif arg_count == 2: result = cluster.start(report_progress) if isinstance(result, list) or isinstance(result, tuple): cluster_settings = result[0] cluster_data = result[1] if len(result) > 1 else {} cluster_setup = {} cluster_setup['hadoopSettings'] = cluster_settings.get('hadoop', None) cluster_setup['hiveSettings'] = cluster_settings.get('hive', None) cluster_setup['impalaSettings'] = cluster_settings.get('impala', None) cluster_setup['sparkSettings'] = cluster_settings.get('spark', None) cluster_setup['data'] = cluster_data send_result_json(cluster_setup, link) else: raise Exception("start() didn't return an object of a valid type: %s" % type(result)) elif call_name == 'stop': # init progress reporting if relevant report_progress = get_progress_callback(cluster.get_stop_progress_target, link) arg_count = len(inspect.getargspec(cluster.stop).args) if arg_count == 2: result = cluster.stop(data) elif arg_count == 3: result = cluster.stop(data, report_progress) send_result_json({'ok':True}, link) else: if hasattr(cluster, call_name): action_attr = getattr(cluster, call_name) if inspect.ismethod(action_attr): result = action_attr(data) # convert to something that is legit for a JsonObject if result is None: result = {} if not isinstance(result, dict): result = {'result':result} # send link.send_json({'ok':True, 'response':result}) else: raise Exception("Wrong call type : %s is not a method" % call_name) else: raise Exception("Wrong call type : %s" % call_name) # send end of stream (data is expected as a stream) link.send_string('') except: traceback.print_exc() send_error(link) finally: # done link.close()
def serve(port, secret): link = JavaLink(port, secret) # initiate connection link.connect() # get work to do try: command = link.get_command() config = command.get("config", {}) plugin_config = command.get("pluginConfig", {}) code = command["code"] # get the connector object clazz = get_clazz_in_code(code, Connector) arg_count = len(inspect.getargspec(clazz.__init__).args) connector = None if arg_count == 1: connector = clazz() elif arg_count == 2: connector = clazz(config) elif arg_count == 3: connector = clazz(config, plugin_config) else: raise Exception( "Wrong signature of the Connector subclass: %i args" % arg_count) link.send_json({'ok': True}) stored_error = None # loop and process commands while True: request = link.read_json() if request is None: break response = None task = request["task"] logging.info("Processing task: %s" % task) if task == "read_rows": schema = request.get("schema", None) partitioning = request.get("partitioning", None) partition_id = request.get("partitionId", None) limit = request.get("limit", None) stored_error = None try: with link.get_output() as output: read_rows(connector, schema, partitioning, partition_id, limit, output) except: logging.exception("Connector send fail, storing exception") stored_error = get_json_friendly_error() link.send_string('') elif task == "finish_read_session": if stored_error is None: link.send_json({"ok": True}) else: link.send_json({"ok": False, "error": stored_error}) elif task == "write_rows": schema = request.get("schema", None) partitioning = request.get("partitioning", None) partition_id = request.get("partitionId", None) with link.get_input() as input: write_rows(connector, schema, partitioning, partition_id, input) link.send_json({'ok': True}) elif task == "get_schema": link.send_json({'schema': connector.get_read_schema()}) elif task == "get_partitioning_scheme": link.send_json({'partitioning': connector.get_partitioning()}) elif task == "list_partitions": partitioning = request.get("partitioning", None) link.send_json( {'partitions': connector.list_partitions(partitioning)}) elif task == "partition_exists": partitioning = request.get("partitioning", None) partition_id = request.get("partitionId", None) link.send_json({ "exists": connector.partition_exists(partitioning, partition_id) }) elif task == "records_count": partitioning = request.get("partitioning", None) partition_id = request.get("partitionId", None) link.send_json({ "count": connector.get_records_count(partitioning, partition_id) }) else: raise Exception("Unexpected task %s" % task) # send end of stream logging.info("Work done") link.send_string('') except: link.send_string('') # mark failure traceback.print_exc() link.send_json(get_json_friendly_error()) finally: # done link.close()