def extract_logical_plan(self, topology): """ Returns the representation of logical plan that will be returned from Tracker. """ logicalPlan = { "spouts": {}, "bolts": {}, } # Add spouts. for spout in topology.spouts(): spoutName = spout.comp.name spoutType = "default" spoutSource = "NA" spoutVersion = "NA" spoutConfigs = spout.comp.config.kvs for kvs in spoutConfigs: if kvs.key == "spout.type": spoutType = javaobj.loads(kvs.serialized_value) elif kvs.key == "spout.source": spoutSource = javaobj.loads(kvs.serialized_value) elif kvs.key == "spout.version": spoutVersion = javaobj.loads(kvs.serialized_value) spoutPlan = { "config": convert_pb_kvs(spoutConfigs, include_non_primitives=False), "type": spoutType, "source": spoutSource, "version": spoutVersion, "outputs": [] } for outputStream in list(spout.outputs): spoutPlan["outputs"].append({ "stream_name": outputStream.stream.id }) logicalPlan["spouts"][spoutName] = spoutPlan # Add bolts. for bolt in topology.bolts(): boltName = bolt.comp.name boltPlan = { "config": convert_pb_kvs(bolt.comp.config.kvs, include_non_primitives=False), "outputs": [], "inputs": [] } for outputStream in list(bolt.outputs): boltPlan["outputs"].append({ "stream_name": outputStream.stream.id }) for inputStream in list(bolt.inputs): boltPlan["inputs"].append({ "stream_name": inputStream.stream.id, "component_name": inputStream.stream.component_name, "grouping": topology_pb2.Grouping.Name(inputStream.gtype) }) logicalPlan["bolts"][boltName] = boltPlan return logicalPlan
def extract_logical_plan(self, topology): """ Returns the representation of logical plan that will be returned from Tracker. """ logicalPlan = { "spouts": {}, "bolts": {}, } # Add spouts. for spout in topology.spouts(): spoutName = spout.comp.name spoutType = "default" spoutSource = "NA" spoutVersion = "NA" spoutConfigs = spout.comp.config.kvs for kvs in spoutConfigs: if kvs.key == "spout.type": spoutType = javaobj.loads(kvs.serialized_value) elif kvs.key == "spout.source": spoutSource = javaobj.loads(kvs.serialized_value) elif kvs.key == "spout.version": spoutVersion = javaobj.loads(kvs.serialized_value) spoutPlan = { "config": convert_pb_kvs(spoutConfigs, include_non_primitives=False), "type": spoutType, "source": spoutSource, "version": spoutVersion, "outputs": [] } for outputStream in list(spout.outputs): spoutPlan["outputs"].append({ "stream_name": outputStream.stream.id }) logicalPlan["spouts"][spoutName] = spoutPlan # Add bolts. for bolt in topology.bolts(): boltName = bolt.comp.name boltPlan = { "config": convert_pb_kvs(bolt.comp.config.kvs, include_non_primitives=False), "outputs": [], "inputs": [] } for outputStream in list(bolt.outputs): boltPlan["outputs"].append({ "stream_name": outputStream.stream.id }) for inputStream in list(bolt.inputs): boltPlan["inputs"].append({ "stream_name": inputStream.stream.id, "component_name": inputStream.stream.component_name, "grouping": topology_pb2.Grouping.Name(inputStream.gtype) }) logicalPlan["bolts"][boltName] = boltPlan return logicalPlan
def _convert_java_value(kv, include_non_primitives=True): try: pobj = javaobj.loads(kv.serialized_value) if pyutils.is_str_instance(pobj): return pobj if pobj.is_primitive(): return pobj.value if include_non_primitives: # java objects that are not strings return value and encoded value # Hexadecimal byte array for Serialized objects that return { 'value': json.dumps(pobj, default=lambda custom_field: custom_field.__dict__, sort_keys=True, indent=2), 'raw': utils.hex_escape(kv.serialized_value) } return None except Exception: Log.exception("Failed to parse data as java object") if include_non_primitives: return _raw_value(kv) else: return None
def _convert_java_value(kv, include_non_primitives=True): try: pobj = javaobj.loads(kv.serialized_value) if pyutils.is_str_instance(pobj): return pobj if pobj.is_primitive(): return pobj.value if include_non_primitives: # java objects that are not strings return value and encoded value # Hexadecimal byte array for Serialized objects that return { 'value' : json.dumps(pobj, default=lambda custom_field: custom_field.__dict__, sort_keys=True, indent=2), 'raw' : utils.hex_escape(kv.serialized_value)} return None except Exception: Log.exception("Failed to parse data as java object") if include_non_primitives: return _raw_value(kv) else: return None
def extract_logical_plan(self, topology): """ Returns the representation of logical plan that will be returned from Tracker. """ logicalPlan = { "spouts": {}, "bolts": {}, } # Add spouts. for spout in topology.spouts(): spoutName = spout.comp.name spoutType = "default" spoutSource = "NA" spoutVersion = "NA" spoutConfigs = spout.comp.config.kvs spoutExtraLinks = [] for kvs in spoutConfigs: if kvs.key == "spout.type": spoutType = javaobj.loads(kvs.serialized_value) elif kvs.key == "spout.source": spoutSource = javaobj.loads(kvs.serialized_value) elif kvs.key == "spout.version": spoutVersion = javaobj.loads(kvs.serialized_value) elif kvs.key == "extra.links": spoutExtraLinks = json.loads( javaobj.loads(kvs.serialized_value)) spoutPlan = { "config": convert_pb_kvs(spoutConfigs, include_non_primitives=False), "type": spoutType, "source": spoutSource, "version": spoutVersion, "outputs": [], "extra_links": spoutExtraLinks, } # render component extra links with general params execution_state = topology.execution_state executionState = { "cluster": execution_state.cluster, "environ": execution_state.environ, "role": execution_state.role, "jobname": topology.name, "submission_user": execution_state.submission_user, } for link in spoutPlan["extra_links"]: link[EXTRA_LINK_URL_KEY] = self.config.get_formatted_url( link[EXTRA_LINK_FORMATTER_KEY], executionState) for outputStream in list(spout.outputs): spoutPlan["outputs"].append( {"stream_name": outputStream.stream.id}) logicalPlan["spouts"][spoutName] = spoutPlan # Add bolts. for bolt in topology.bolts(): boltName = bolt.comp.name boltPlan = { "config": convert_pb_kvs(bolt.comp.config.kvs, include_non_primitives=False), "outputs": [], "inputs": [] } for outputStream in list(bolt.outputs): boltPlan["outputs"].append( {"stream_name": outputStream.stream.id}) for inputStream in list(bolt.inputs): boltPlan["inputs"].append({ "stream_name": inputStream.stream.id, "component_name": inputStream.stream.component_name, "grouping": topology_pb2.Grouping.Name(inputStream.gtype) }) logicalPlan["bolts"][boltName] = boltPlan return logicalPlan
def extract_physical_plan(self, topology): """ Returns the representation of physical plan that will be returned from Tracker. """ physicalPlan = { "instances": {}, "instance_groups": {}, "stmgrs": {}, "spouts": {}, "bolts": {}, "config": {}, } if not topology.physical_plan: return physicalPlan spouts = topology.spouts() bolts = topology.bolts() stmgrs = None instances = None # Physical Plan stmgrs = list(topology.physical_plan.stmgrs) instances = list(topology.physical_plan.instances) # Configs if topology.physical_plan.topology.topology_config: for kvs in topology.physical_plan.topology.topology_config.kvs: if kvs.value: physicalPlan["config"][kvs.key] = kvs.value elif kvs.serialized_value: # currently assumes that serialized_value is Java serialization # when multi-language support is added later, ConfigValueType should be checked # Hexadecimal byte array for Serialized objects try: pobj = javaobj.loads(kvs.serialized_value) physicalPlan["config"][kvs.key] = { 'value' : json.dumps(pobj, default=lambda custom_field: custom_field.__dict__, sort_keys=True, indent=2), 'raw' : utils.hex_escape(kvs.serialized_value)} except Exception: Log.exception("Failed to parse data as java object") physicalPlan["config"][kvs.key] = { # The value should be a valid json object 'value' : '{}', 'raw' : utils.hex_escape(kvs.serialized_value)} for spout in spouts: spout_name = spout.comp.name physicalPlan["spouts"][spout_name] = [] for bolt in bolts: bolt_name = bolt.comp.name physicalPlan["bolts"][bolt_name] = [] for stmgr in stmgrs: host = stmgr.host_name cwd = stmgr.cwd shell_port = stmgr.shell_port if stmgr.HasField("shell_port") else None physicalPlan["stmgrs"][stmgr.id] = { "id": stmgr.id, "host": host, "port": stmgr.data_port, "shell_port": shell_port, "cwd": cwd, "pid": stmgr.pid, "joburl": utils.make_shell_job_url(host, shell_port, cwd), "logfiles": utils.make_shell_logfiles_url(host, shell_port, cwd), "instance_ids": [] } instance_groups = collections.OrderedDict() for instance in instances: instance_id = instance.instance_id stmgrId = instance.stmgr_id name = instance.info.component_name stmgrInfo = physicalPlan["stmgrs"][stmgrId] host = stmgrInfo["host"] cwd = stmgrInfo["cwd"] shell_port = stmgrInfo["shell_port"] index = int(instance.info.component_index) + 1 group_name = "container_%d" % index igroup = instance_groups.get(group_name, list()) igroup.append(instance_id) instance_groups[group_name] = igroup physicalPlan["instances"][instance_id] = { "id": instance_id, "name": name, "stmgrId": stmgrId, "logfile": utils.make_shell_logfiles_url(host, shell_port, cwd, instance.instance_id), } physicalPlan["stmgrs"][stmgrId]["instance_ids"].append(instance_id) if name in physicalPlan["spouts"]: physicalPlan["spouts"][name].append(instance_id) else: physicalPlan["bolts"][name].append(instance_id) physicalPlan["instance_groups"] = instance_groups return physicalPlan
def extract_physical_plan(self, topology): """ Returns the representation of physical plan that will be returned from Tracker. """ physicalPlan = { "instances": {}, "stmgrs": {}, "spouts": {}, "bolts": {}, "config": {}, } if not topology.physical_plan: return physicalPlan spouts = topology.spouts() bolts = topology.bolts() stmgrs = None instances = None # Physical Plan stmgrs = list(topology.physical_plan.stmgrs) instances = list(topology.physical_plan.instances) # Configs if topology.physical_plan.topology.topology_config: for kvs in topology.physical_plan.topology.topology_config.kvs: if kvs.value: physicalPlan["config"][kvs.key] = kvs.value elif kvs.serialized_value: # currently assumes that serialized_value is Java serialization # when multi-language support is added later, ConfigValueType should be checked # Hexadecimal byte array for Serialized objects try: pobj = javaobj.loads(kvs.serialized_value) physicalPlan["config"][kvs.key] = { 'value' : json.dumps(pobj, default=lambda custom_field: custom_field.__dict__, sort_keys=True, indent=2), 'raw' : utils.hex_escape(kvs.serialized_value)} except Exception: physicalPlan["config"][kvs.key] = { 'value' : 'A Java Object', 'raw' : utils.hex_escape(kvs.serialized_value)} for spout in spouts: spout_name = spout.comp.name physicalPlan["spouts"][spout_name] = [] for bolt in bolts: bolt_name = bolt.comp.name physicalPlan["bolts"][bolt_name] = [] for stmgr in stmgrs: host = stmgr.host_name cwd = stmgr.cwd shell_port = stmgr.shell_port if stmgr.HasField("shell_port") else None physicalPlan["stmgrs"][stmgr.id] = { "id": stmgr.id, "host": host, "port": stmgr.data_port, "shell_port": shell_port, "cwd": cwd, "pid": stmgr.pid, "joburl": utils.make_shell_job_url(host, shell_port, cwd), "logfiles": utils.make_shell_logfiles_url(host, shell_port, cwd), "instance_ids": [] } for instance in instances: instance_id = instance.instance_id stmgrId = instance.stmgr_id name = instance.info.component_name stmgrInfo = physicalPlan["stmgrs"][stmgrId] host = stmgrInfo["host"] cwd = stmgrInfo["cwd"] shell_port = stmgrInfo["shell_port"] physicalPlan["instances"][instance_id] = { "id": instance_id, "name": name, "stmgrId": stmgrId, "logfile": utils.make_shell_logfiles_url(host, shell_port, cwd, instance.instance_id), } physicalPlan["stmgrs"][stmgrId]["instance_ids"].append(instance_id) if name in physicalPlan["spouts"]: physicalPlan["spouts"][name].append(instance_id) else: physicalPlan["bolts"][name].append(instance_id) return physicalPlan