Esempio n. 1
0
    def extract_physical_plan(self, topology):
        """
    Returns the representation of physical plan that will
    be returned from Tracker.
    """
        physicalPlan = {
            "instances": {},
            "instance_groups": {},
            "stmgrs": {},
            "spouts": {},
            "bolts": {},
            "config": {},
            "components": {}
        }

        if not topology.physical_plan:
            return physicalPlan

        spouts = topology.spouts()
        bolts = topology.bolts()
        stmgrs = None
        instances = None

        # Physical Plan
        stmgrs = list(topology.physical_plan.stmgrs)
        instances = list(topology.physical_plan.instances)

        # Configs
        if topology.physical_plan.topology.topology_config:
            physicalPlan["config"] = convert_pb_kvs(
                topology.physical_plan.topology.topology_config.kvs)

        for spout in spouts:
            spout_name = spout.comp.name
            physicalPlan["spouts"][spout_name] = []
            if spout_name not in physicalPlan["components"]:
                physicalPlan["components"][spout_name] = {
                    "config": convert_pb_kvs(spout.comp.config.kvs)
                }
        for bolt in bolts:
            bolt_name = bolt.comp.name
            physicalPlan["bolts"][bolt_name] = []
            if bolt_name not in physicalPlan["components"]:
                physicalPlan["components"][bolt_name] = {
                    "config": convert_pb_kvs(bolt.comp.config.kvs)
                }

        for stmgr in stmgrs:
            host = stmgr.host_name
            cwd = stmgr.cwd
            shell_port = stmgr.shell_port if stmgr.HasField(
                "shell_port") else None
            physicalPlan["stmgrs"][stmgr.id] = {
                "id": stmgr.id,
                "host": host,
                "port": stmgr.data_port,
                "shell_port": shell_port,
                "cwd": cwd,
                "pid": stmgr.pid,
                "joburl": utils.make_shell_job_url(host, shell_port, cwd),
                "logfiles":
                utils.make_shell_logfiles_url(host, shell_port, cwd),
                "instance_ids": []
            }

        instance_groups = collections.OrderedDict()
        for instance in instances:
            instance_id = instance.instance_id
            stmgrId = instance.stmgr_id
            name = instance.info.component_name
            stmgrInfo = physicalPlan["stmgrs"][stmgrId]
            host = stmgrInfo["host"]
            cwd = stmgrInfo["cwd"]
            shell_port = stmgrInfo["shell_port"]

            # instance_id format container_<index>_component_1
            # group name is container_<index>
            group_name = instance_id.rsplit("_", 2)[0]
            igroup = instance_groups.get(group_name, list())
            igroup.append(instance_id)
            instance_groups[group_name] = igroup

            physicalPlan["instances"][instance_id] = {
                "id":
                instance_id,
                "name":
                name,
                "stmgrId":
                stmgrId,
                "logfile":
                utils.make_shell_logfiles_url(host, shell_port, cwd,
                                              instance.instance_id),
            }
            physicalPlan["stmgrs"][stmgrId]["instance_ids"].append(instance_id)
            if name in physicalPlan["spouts"]:
                physicalPlan["spouts"][name].append(instance_id)
            else:
                physicalPlan["bolts"][name].append(instance_id)

        physicalPlan["instance_groups"] = instance_groups

        return physicalPlan
Esempio n. 2
0
  def extract_physical_plan(self, topology):
    """
    Returns the representation of physical plan that will
    be returned from Tracker.
    """
    physicalPlan = {
        "instances": {},
        "instance_groups": {},
        "stmgrs": {},
        "spouts": {},
        "bolts": {},
        "config": {},
        "components": {}
    }

    if not topology.physical_plan:
      return physicalPlan

    spouts = topology.spouts()
    bolts = topology.bolts()
    stmgrs = None
    instances = None

    # Physical Plan
    stmgrs = list(topology.physical_plan.stmgrs)
    instances = list(topology.physical_plan.instances)

    # Configs
    if topology.physical_plan.topology.topology_config:
      physicalPlan["config"] = convert_pb_kvs(topology.physical_plan.topology.topology_config.kvs)

    for spout in spouts:
      spout_name = spout.comp.name
      physicalPlan["spouts"][spout_name] = []
      if spout_name not in physicalPlan["components"]:
        physicalPlan["components"][spout_name] = {
            "config": convert_pb_kvs(spout.comp.config.kvs)
        }
    for bolt in bolts:
      bolt_name = bolt.comp.name
      physicalPlan["bolts"][bolt_name] = []
      if bolt_name not in physicalPlan["components"]:
        physicalPlan["components"][bolt_name] = {
            "config": convert_pb_kvs(bolt.comp.config.kvs)
        }

    for stmgr in stmgrs:
      host = stmgr.host_name
      cwd = stmgr.cwd
      shell_port = stmgr.shell_port if stmgr.HasField("shell_port") else None
      physicalPlan["stmgrs"][stmgr.id] = {
          "id": stmgr.id,
          "host": host,
          "port": stmgr.data_port,
          "shell_port": shell_port,
          "cwd": cwd,
          "pid": stmgr.pid,
          "joburl": utils.make_shell_job_url(host, shell_port, cwd),
          "logfiles": utils.make_shell_logfiles_url(host, shell_port, cwd),
          "instance_ids": []
      }

    instance_groups = collections.OrderedDict()
    for instance in instances:
      instance_id = instance.instance_id
      stmgrId = instance.stmgr_id
      name = instance.info.component_name
      stmgrInfo = physicalPlan["stmgrs"][stmgrId]
      host = stmgrInfo["host"]
      cwd = stmgrInfo["cwd"]
      shell_port = stmgrInfo["shell_port"]


      # instance_id format container_<index>_component_1
      # group name is container_<index>
      group_name = instance_id.rsplit("_", 2)[0]
      igroup = instance_groups.get(group_name, list())
      igroup.append(instance_id)
      instance_groups[group_name] = igroup

      physicalPlan["instances"][instance_id] = {
          "id": instance_id,
          "name": name,
          "stmgrId": stmgrId,
          "logfile": utils.make_shell_logfiles_url(host, shell_port, cwd, instance.instance_id),
      }
      physicalPlan["stmgrs"][stmgrId]["instance_ids"].append(instance_id)
      if name in physicalPlan["spouts"]:
        physicalPlan["spouts"][name].append(instance_id)
      else:
        physicalPlan["bolts"][name].append(instance_id)

    physicalPlan["instance_groups"] = instance_groups

    return physicalPlan
Esempio n. 3
0
  def extract_physical_plan(self, topology):
    """
    Returns the representation of physical plan that will
    be returned from Tracker.
    """
    physicalPlan = {
        "instances": {},
        "instance_groups": {},
        "stmgrs": {},
        "spouts": {},
        "bolts": {},
        "config": {},
    }

    if not topology.physical_plan:
      return physicalPlan

    spouts = topology.spouts()
    bolts = topology.bolts()
    stmgrs = None
    instances = None

    # Physical Plan
    stmgrs = list(topology.physical_plan.stmgrs)
    instances = list(topology.physical_plan.instances)

    # Configs
    if topology.physical_plan.topology.topology_config:
      for kvs in topology.physical_plan.topology.topology_config.kvs:
        if kvs.value:
          physicalPlan["config"][kvs.key] = kvs.value
        elif kvs.serialized_value:
          # currently assumes that serialized_value is Java serialization
          # when multi-language support is added later, ConfigValueType should be checked

          # Hexadecimal byte array for Serialized objects
          try:
            pobj = javaobj.loads(kvs.serialized_value)
            physicalPlan["config"][kvs.key] = {
                'value' : json.dumps(pobj,
                                     default=lambda custom_field: custom_field.__dict__,
                                     sort_keys=True,
                                     indent=2),
                'raw' : utils.hex_escape(kvs.serialized_value)}
          except Exception:
            Log.exception("Failed to parse data as java object")
            physicalPlan["config"][kvs.key] = {
                # The value should be a valid json object
                'value' : '{}',
                'raw' : utils.hex_escape(kvs.serialized_value)}
    for spout in spouts:
      spout_name = spout.comp.name
      physicalPlan["spouts"][spout_name] = []
    for bolt in bolts:
      bolt_name = bolt.comp.name
      physicalPlan["bolts"][bolt_name] = []

    for stmgr in stmgrs:
      host = stmgr.host_name
      cwd = stmgr.cwd
      shell_port = stmgr.shell_port if stmgr.HasField("shell_port") else None
      physicalPlan["stmgrs"][stmgr.id] = {
          "id": stmgr.id,
          "host": host,
          "port": stmgr.data_port,
          "shell_port": shell_port,
          "cwd": cwd,
          "pid": stmgr.pid,
          "joburl": utils.make_shell_job_url(host, shell_port, cwd),
          "logfiles": utils.make_shell_logfiles_url(host, shell_port, cwd),
          "instance_ids": []
      }

    instance_groups = collections.OrderedDict()
    for instance in instances:
      instance_id = instance.instance_id
      stmgrId = instance.stmgr_id
      name = instance.info.component_name
      stmgrInfo = physicalPlan["stmgrs"][stmgrId]
      host = stmgrInfo["host"]
      cwd = stmgrInfo["cwd"]
      shell_port = stmgrInfo["shell_port"]

      index = int(instance.info.component_index) + 1
      group_name = "container_%d" % index
      igroup = instance_groups.get(group_name, list())
      igroup.append(instance_id)
      instance_groups[group_name] = igroup

      physicalPlan["instances"][instance_id] = {
          "id": instance_id,
          "name": name,
          "stmgrId": stmgrId,
          "logfile": utils.make_shell_logfiles_url(host, shell_port, cwd, instance.instance_id),
      }
      physicalPlan["stmgrs"][stmgrId]["instance_ids"].append(instance_id)
      if name in physicalPlan["spouts"]:
        physicalPlan["spouts"][name].append(instance_id)
      else:
        physicalPlan["bolts"][name].append(instance_id)

    physicalPlan["instance_groups"] = instance_groups

    return physicalPlan
Esempio n. 4
0
    def _build_physical_plan(physical_plan) -> TopologyInfoPhysicalPlan:
        if not physical_plan:
            return TopologyInfoPhysicalPlan(
                instances={},
                instance_groups={},
                stmgrs={},
                spouts={},
                bolts={},
                config={},
                components={},
            )
        config = {}
        if physical_plan.topology.topology_config:
            config = utils.convert_pb_kvs(
                physical_plan.topology.topology_config.kvs)

        components = {}
        spouts = {}
        bolts = {}
        for spout in physical_plan.topology.spouts:
            name = spout.comp.name
            spouts[name] = []
            if name not in components:
                components[name] = PhysicalPlanComponent(
                    config=utils.convert_pb_kvs(spout.comp.config.kvs), )
        for bolt in physical_plan.topology.bolts:
            name = bolt.comp.name
            bolts[name] = []
            if name not in components:
                components[name] = PhysicalPlanComponent(
                    config=utils.convert_pb_kvs(bolt.comp.config.kvs), )

        stmgrs = {}
        for stmgr in physical_plan.stmgrs:
            shell_port = stmgr.shell_port if stmgr.HasField(
                "shell_port") else None
            stmgrs[stmgr.id] = PhysicalPlanStmgr(
                id=stmgr.id,
                host=stmgr.host_name,
                port=stmgr.data_port,
                shell_port=shell_port,
                cwd=stmgr.cwd,
                pid=stmgr.pid,
                joburl=utils.make_shell_job_url(stmgr.host_name, shell_port,
                                                stmgr.cwd),
                logfiles=utils.make_shell_logfiles_url(stmgr.host_name,
                                                       stmgr.shell_port,
                                                       stmgr.cwd),
                instance_ids=[],
            )

        instances = {}
        instance_groups = {}
        for instance in physical_plan.instances:
            component_name = instance.info.component_name
            instance_id = instance.instance_id
            if component_name in spouts:
                spouts[component_name].append(instance_id)
            else:
                bolts[component_name].append(instance_id)

            stmgr = stmgrs[instance.stmgr_id]
            stmgr.instance_ids.append(instance_id)
            instances[instance_id] = PhysicalPlanInstance(
                id=instance_id,
                name=component_name,
                stmgr_id=instance.stmgr_id,
                logfile=utils.make_shell_logfiles_url(
                    stmgr.host,
                    stmgr.shell_port,
                    stmgr.cwd,
                    instance_id,
                ),
            )

            # instance_id example: container_1_component_1
            # group name would be: container_1
            group_name = instance_id.rsplit("_", 2)[0]
            instance_groups.setdefault(group_name, []).append(instance_id)

        return TopologyInfoPhysicalPlan(
            instances=instances,
            instance_groups=instance_groups,
            stmgrs=stmgrs,
            spouts=spouts,
            bolts=bolts,
            components=components,
            config=config,
        )
Esempio n. 5
0
  def extract_physical_plan(self, topology):
    """
    Returns the representation of physical plan that will
    be returned from Tracker.
    """
    physicalPlan = {
        "instances": {},
        "stmgrs": {},
        "spouts": {},
        "bolts": {},
        "config": {},
    }

    if not topology.physical_plan:
      return physicalPlan

    spouts = topology.spouts()
    bolts = topology.bolts()
    stmgrs = None
    instances = None

    # Physical Plan
    stmgrs = list(topology.physical_plan.stmgrs)
    instances = list(topology.physical_plan.instances)

    # Configs
    if topology.physical_plan.topology.topology_config:
      for kvs in topology.physical_plan.topology.topology_config.kvs:
        if kvs.value:
          physicalPlan["config"][kvs.key] = kvs.value
        elif kvs.serialized_value:
          # currently assumes that serialized_value is Java serialization
          # when multi-language support is added later, ConfigValueType should be checked

          # Hexadecimal byte array for Serialized objects
          try:
            pobj = javaobj.loads(kvs.serialized_value)
            physicalPlan["config"][kvs.key] = {
                'value' : json.dumps(pobj,
                                     default=lambda custom_field: custom_field.__dict__,
                                     sort_keys=True,
                                     indent=2),
                'raw' : utils.hex_escape(kvs.serialized_value)}
          except Exception:
            physicalPlan["config"][kvs.key] = {
                'value' : 'A Java Object',
                'raw' : utils.hex_escape(kvs.serialized_value)}
    for spout in spouts:
      spout_name = spout.comp.name
      physicalPlan["spouts"][spout_name] = []
    for bolt in bolts:
      bolt_name = bolt.comp.name
      physicalPlan["bolts"][bolt_name] = []

    for stmgr in stmgrs:
      host = stmgr.host_name
      cwd = stmgr.cwd
      shell_port = stmgr.shell_port if stmgr.HasField("shell_port") else None
      physicalPlan["stmgrs"][stmgr.id] = {
          "id": stmgr.id,
          "host": host,
          "port": stmgr.data_port,
          "shell_port": shell_port,
          "cwd": cwd,
          "pid": stmgr.pid,
          "joburl": utils.make_shell_job_url(host, shell_port, cwd),
          "logfiles": utils.make_shell_logfiles_url(host, shell_port, cwd),
          "instance_ids": []
      }

    for instance in instances:
      instance_id = instance.instance_id
      stmgrId = instance.stmgr_id
      name = instance.info.component_name
      stmgrInfo = physicalPlan["stmgrs"][stmgrId]
      host = stmgrInfo["host"]
      cwd = stmgrInfo["cwd"]
      shell_port = stmgrInfo["shell_port"]

      physicalPlan["instances"][instance_id] = {
          "id": instance_id,
          "name": name,
          "stmgrId": stmgrId,
          "logfile": utils.make_shell_logfiles_url(host, shell_port, cwd, instance.instance_id),
      }
      physicalPlan["stmgrs"][stmgrId]["instance_ids"].append(instance_id)
      if name in physicalPlan["spouts"]:
        physicalPlan["spouts"][name].append(instance_id)
      else:
        physicalPlan["bolts"][name].append(instance_id)

    return physicalPlan