Example #1
0
def start_cluster(cl_args):
  '''
  Start a Heron standalone cluster
  '''
  roles = read_and_parse_roles(cl_args)
  masters = roles[Role.MASTERS]
  slaves = roles[Role.SLAVES]
  zookeepers = roles[Role.ZOOKEEPERS]
  Log.info("Roles:")
  Log.info(" - Master Servers: %s" % list(masters))
  Log.info(" - Slave Servers: %s" % list(slaves))
  Log.info(" - Zookeeper Servers: %s" % list(zookeepers))
  if not masters:
    Log.error("No master servers specified!")
    sys.exit(-1)
  if not slaves:
    Log.error("No slave servers specified!")
    sys.exit(-1)
  if not zookeepers:
    Log.error("No zookeeper servers specified!")
    sys.exit(-1)
  # make sure configs are templated
  update_config_files(cl_args)

  dist_nodes = list(masters.union(slaves))
  # if just local deployment
  if not (len(dist_nodes) == 1 and is_self(dist_nodes[0])):
    distribute_package(roles, cl_args)
  start_master_nodes(masters, cl_args)
  start_slave_nodes(slaves, cl_args)
  start_api_server(masters, cl_args)
  start_heron_tools(masters, cl_args)
  Log.info("Heron standalone cluster complete!")
Example #2
0
 def _emit_terminal_if_needed(self):
     Log.info("is_done: %s, tuples_to_complete: %s" % (self.is_done, self.tuples_to_complete))
     if self.is_done and self.tuples_to_complete == 0:
         Log.info("Emitting terminals to downstream")
         super(IntegrationTestSpout, self).emit(
             [integ_const.INTEGRATION_TEST_TERMINAL], stream=integ_const.INTEGRATION_TEST_CONTROL_STREAM_ID
         )
Example #3
0
def start_slave_nodes(slaves, cl_args):
  '''
  Star slave nodes
  '''
  pids = []
  for slave in slaves:
    Log.info("Starting slave on %s" % slave)
    cmd = "%s agent -config %s >> /tmp/nomad_client.log 2>&1 &" \
          % (get_nomad_path(cl_args), get_nomad_slave_config_file(cl_args))
    if not is_self(slave):
      cmd = ssh_remote_execute(cmd, slave, cl_args)
    Log.debug(cmd)
    pid = subprocess.Popen(cmd,
                           shell=True,
                           stdout=subprocess.PIPE,
                           stderr=subprocess.PIPE)
    pids.append({"pid": pid, "dest": slave})

  errors = []
  for entry in pids:
    pid = entry["pid"]
    return_code = pid.wait()
    output = pid.communicate()
    Log.debug("return code: %s output: %s" % (return_code, output))
    if return_code != 0:
      errors.append("Failed to start slave on %s with error:\n%s" % (entry["dest"], output[1]))

  if errors:
    for error in errors:
      Log.error(error)
    sys.exit(-1)

  Log.info("Done starting slaves")
Example #4
0
  def __init__(self, pplan_helper, in_stream, out_stream, looper):
    super(SpoutInstance, self).__init__(pplan_helper, in_stream, out_stream, looper)
    self.topology_state = topology_pb2.TopologyState.Value("PAUSED")

    if not self.pplan_helper.is_spout:
      raise RuntimeError("No spout in physicial plan")

    context = self.pplan_helper.context
    self.spout_metrics = SpoutMetrics(self.pplan_helper)
    self.serializer = SerializerHelper.get_serializer(context)

    # acking related
    self.acking_enabled = context.get_cluster_config().get(api_constants.TOPOLOGY_ENABLE_ACKING,
                                                           False)
    self.enable_message_timeouts = \
      context.get_cluster_config().get(api_constants.TOPOLOGY_ENABLE_MESSAGE_TIMEOUTS)
    Log.info("Enable ACK: %s" % str(self.acking_enabled))
    Log.info("Enable Message Timeouts: %s" % str(self.enable_message_timeouts))

    # map <tuple_info.key -> tuple_info>, ordered by insertion time
    self.in_flight_tuples = collections.OrderedDict()
    self.immediate_acks = collections.deque()
    self.total_tuples_emitted = 0

    # load user's spout class
    spout_impl_class = super(SpoutInstance, self).load_py_instance(is_spout=True)
    self.spout_impl = spout_impl_class(delegate=self)
Example #5
0
 def fail(self, tup):
     Log.info(
         "Trying to do a fail. tuples processed: %d, received: %d" % (self.tuples_processed, self.tuple_received)
     )
     if self.tuples_processed < self.tuple_received:
         super(IntegrationTestBolt, self).fail(tup)
         self.tuples_processed += 1
Example #6
0
def start_api_server(masters, cl_args):
  '''
  Start the Heron API server
  '''
  # make sure nomad cluster is up
  single_master = list(masters)[0]
  wait_for_master_to_start(single_master)

  cmd = "%s run %s >> /tmp/apiserver_start.log 2>&1 &" \
        % (get_nomad_path(cl_args), get_apiserver_job_file(cl_args))
  Log.info("Starting Heron API Server on %s" % single_master)

  if not is_self(single_master):
    cmd = ssh_remote_execute(cmd, single_master, cl_args)
  Log.debug(cmd)
  pid = subprocess.Popen(cmd,
                         shell=True,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)

  return_code = pid.wait()
  output = pid.communicate()
  Log.debug("return code: %s output: %s" % (return_code, output))
  if return_code != 0:
    Log.error("Failed to start apiserver on %s with error:\n%s" % (single_master, output[1]))
    sys.exit(-1)

  wait_for_job_to_start(single_master, "apiserver")
  Log.info("Done starting Heron API Server")
 def prepare(self, context, component, stream, target_tasks):
   Log.info("In prepare of SampleCustomGrouping, "
            "with src component: %s, "
            "with stream id: %s, "
            "with target tasks: %s"
            % (component, stream, str(target_tasks)))
   self.target_tasks = target_tasks
Example #8
0
  def _add_spout_task(self):
    Log.info("Adding spout task...")
    def spout_task():
      # don't do anything when topology is paused
      if not self._is_topology_running():
        return

      if self._should_produce_tuple():
        self._produce_tuple()
        self.output_helper.send_out_tuples()
        self.looper.wake_up() # so emitted tuples would be added to buffer now
      else:
        self.spout_metrics.update_out_queue_full_count()

      if self.acking_enabled:
        self._read_tuples_and_execute()
        self.spout_metrics.update_pending_tuples_count(len(self.in_flight_tuples))
      else:
        self._do_immediate_acks()

      if self._is_continue_to_work():
        self.looper.wake_up()

    self.looper.add_wakeup_task(spout_task)

    # look for the timeout's tuples
    if self.enable_message_timeouts:
      self._look_for_timeouts()
Example #9
0
  def register_watch(self, callback):
    """
    Returns the UUID with which the watch is
    registered. This UUID can be used to unregister
    the watch.
    Returns None if watch could not be registered.

    The argument 'callback' must be a function that takes
    exactly one argument, the topology on which
    the watch was triggered.
    Note that the watch will be unregistered in case
    it raises any Exception the first time.

    This callback is also called at the time
    of registration.
    """
    RETRY_COUNT = 5
    # Retry in case UID is previously
    # generated, just in case...
    for _ in range(RETRY_COUNT):
      # Generate a random UUID.
      uid = uuid.uuid4()
      if uid not in self.watches:
        Log.info("Registering a watch with uid: " + str(uid))
        try:
          callback(self)
        except Exception as e:
          Log.error("Caught exception while triggering callback: " + str(e))
          Log.debug(traceback.format_exc())
          return None
        self.watches[uid] = callback
        return uid
    return None
Example #10
0
def start_heron_tools(masters, cl_args):
  '''
  Start Heron tracker and UI
  '''
  single_master = list(masters)[0]
  wait_for_master_to_start(single_master)

  cmd = "%s run %s >> /tmp/heron_tools_start.log 2>&1 &" \
        % (get_nomad_path(cl_args), get_heron_tools_job_file(cl_args))
  Log.info("Starting Heron Tools on %s" % single_master)

  if not is_self(single_master):
    cmd = ssh_remote_execute(cmd, single_master, cl_args)
  Log.debug(cmd)
  pid = subprocess.Popen(cmd,
                         shell=True,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)

  return_code = pid.wait()
  output = pid.communicate()
  Log.debug("return code: %s output: %s" % (return_code, output))
  if return_code != 0:
    Log.error("Failed to start Heron Tools on %s with error:\n%s" % (single_master, output[1]))
    sys.exit(-1)

  wait_for_job_to_start(single_master, "heron-tools")
  Log.info("Done starting Heron Tools")
Example #11
0
def launch_topologies(cl_args, topology_file, tmp_dir):
  '''
  Launch topologies
  :param cl_args:
  :param topology_file:
  :param tmp_dir:
  :return: list(Responses)
  '''
  # the submitter would have written the .defn file to the tmp_dir
  defn_files = glob.glob(tmp_dir + '/*.defn')

  if len(defn_files) == 0:
    return SimpleResult(Status.HeronError, "No topologies found under %s" % tmp_dir)

  results = []
  for defn_file in defn_files:
    # load the topology definition from the file
    topology_defn = topology_pb2.Topology()
    try:
      handle = open(defn_file, "rb")
      topology_defn.ParseFromString(handle.read())
      handle.close()
    except Exception as e:
      err_context = "Cannot load topology definition '%s': %s" % (defn_file, e)
      return SimpleResult(Status.HeronError, err_context)
    # launch the topology
    Log.info("Launching topology: \'%s\'", topology_defn.name)
    res = launch_a_topology(
        cl_args, tmp_dir, topology_file, defn_file, topology_defn.name)
    results.append(res)
  return results
Example #12
0
 def unregister_watch(self, uid):
   """
   Unregister the watch with the given UUID.
   """
   # Do not raise an error if UUID is
   # not present in the watches.
   Log.info("Unregister a watch with uid: " + str(uid))
   self.watches.pop(uid, None)
Example #13
0
 def handle_initiate_stateful_checkpoint(self, ckptmsg, component):
   Log.info("Received initiate state checkpoint message for %s" % ckptmsg.checkpoint_id)
   if not self.is_stateful:
     raise RuntimeError("Received state checkpoint message but we are not stateful topology")
   if isinstance(component, StatefulComponent):
     component.pre_save(ckptmsg.checkpoint_id)
   else:
     Log.info("Trying to checkponit a non stateful component. Send empty state")
   self.admit_ckpt_state(ckptmsg.checkpoint_id, self._stateful_state)
Example #14
0
 def _post_result_to_server(self, json_result):
     conn = httplib.HTTPConnection(self.parsed_url.netloc)
     conn.request("POST", self.parsed_url.path, json_result)
     response = conn.getresponse()
     if response.status == 200:
         Log.info("HTTP POST successful")
     else:
         Log.severe("HTTP POST failed, response code: %d, response: %s" % (response.status, response.read()))
     return response.status
Example #15
0
 def emit(self, tup, stream=Stream.DEFAULT_STREAM_ID, anchors=None,
          direct_task=None, need_task_ids=False):
   Log.info("emitting tuple: %s", tup)
   if tup is None:
     super(IntegrationTestBolt, self).emit(list(self.current_tuple_processing),
                                           stream=stream, anchors=anchors,
                                           direct_task=direct_task, need_task_ids=need_task_ids)
   else:
     super(IntegrationTestBolt, self).emit(tup, stream, anchors, direct_task, need_task_ids)
Example #16
0
  def setTopologyInfo(self, topology):
    """
    Extracts info from the stored proto states and
    convert it into representation that is exposed using
    the API.
    This method is called on any change for the topology.
    For example, when a container moves and its host or some
    port changes. All the information is parsed all over
    again and cache is updated.
    """
    # Execution state is the most basic info.
    # If there is no execution state, just return
    # as the rest of the things don't matter.
    if not topology.execution_state:
      Log.info("No execution state found for: " + topology.name)
      return

    Log.info("Setting topology info for topology: " + topology.name)
    has_physical_plan = True
    if not topology.physical_plan:
      has_physical_plan = False

    has_tmaster_location = True
    if not topology.tmaster:
      has_tmaster_location = False

    has_scheduler_location = True
    if not topology.scheduler_location:
      has_scheduler_location = False

    topologyInfo = {
        "name": topology.name,
        "id": topology.id,
        "logical_plan": None,
        "physical_plan": None,
        "execution_state": None,
        "tmaster_location": None,
        "scheduler_location": None,
    }

    executionState = self.extract_execution_state(topology)
    executionState["has_physical_plan"] = has_physical_plan
    executionState["has_tmaster_location"] = has_tmaster_location
    executionState["has_scheduler_location"] = has_scheduler_location
    executionState["status"] = topology.get_status()

    topologyInfo["metadata"] = self.extract_metadata(topology)
    topologyInfo["runtime_state"] = self.extract_runtime_state(topology)

    topologyInfo["execution_state"] = executionState
    topologyInfo["logical_plan"] = self.extract_logical_plan(topology)
    topologyInfo["physical_plan"] = self.extract_physical_plan(topology)
    topologyInfo["tmaster_location"] = self.extract_tmaster(topology)
    topologyInfo["scheduler_location"] = self.extract_scheduler_location(topology)

    self.topologyInfos[(topology.name, topology.state_manager_name)] = topologyInfo
  def on_incoming_message(self, message):
    self.gateway_metrics.received_packet(message.ByteSize())

    if isinstance(message, stmgr_pb2.NewInstanceAssignmentMessage):
      Log.info("Handling assignment message from direct NewInstanceAssignmentMessage")
      self._handle_assignment_message(message.pplan)
    elif isinstance(message, stmgr_pb2.TupleMessage):
      self._handle_new_tuples(message)
    else:
      raise RuntimeError("Unknown kind of message received from Stream Manager")
Example #18
0
  def addNewTopology(self, state_manager, topologyName):
    """
    Adds a topology in the local cache, and sets a watch
    on any changes on the topology.
    """
    topology = Topology(topologyName, state_manager.name)
    Log.info("Adding new topology: %s, state_manager: %s",
             topologyName, state_manager.name)
    self.topologies.append(topology)

    # Register a watch on topology and change
    # the topologyInfo on any new change.
    topology.register_watch(self.setTopologyInfo)

    def on_topology_pplan(data):
      """watch physical plan"""
      Log.info("Watch triggered for topology pplan: " + topologyName)
      topology.set_physical_plan(data)
      if not data:
        Log.debug("No data to be set")

    def on_topology_packing_plan(data):
      """watch packing plan"""
      Log.info("Watch triggered for topology packing plan: " + topologyName)
      topology.set_packing_plan(data)
      if not data:
        Log.debug("No data to be set")

    def on_topology_execution_state(data):
      """watch execution state"""
      Log.info("Watch triggered for topology execution state: " + topologyName)
      topology.set_execution_state(data)
      if not data:
        Log.debug("No data to be set")

    def on_topology_tmaster(data):
      """set tmaster"""
      Log.info("Watch triggered for topology tmaster: " + topologyName)
      topology.set_tmaster(data)
      if not data:
        Log.debug("No data to be set")

    def on_topology_scheduler_location(data):
      """set scheduler location"""
      Log.info("Watch triggered for topology scheduler location: " + topologyName)
      topology.set_scheduler_location(data)
      if not data:
        Log.debug("No data to be set")

    # Set watches on the pplan, execution_state, tmaster and scheduler_location.
    state_manager.get_pplan(topologyName, on_topology_pplan)
    state_manager.get_packing_plan(topologyName, on_topology_packing_plan)
    state_manager.get_execution_state(topologyName, on_topology_execution_state)
    state_manager.get_tmaster(topologyName, on_topology_tmaster)
    state_manager.get_scheduler_location(topologyName, on_topology_scheduler_location)
Example #19
0
  def _handle_register_response(self, response):
    """Called when a register response (RegisterInstanceResponse) arrives"""
    if response.status.status != common_pb2.StatusCode.Value("OK"):
      raise RuntimeError("Stream Manager returned a not OK response for register")
    Log.info("We registered ourselves to the Stream Manager")

    if response.HasField("pplan"):
      Log.info("Handling assignment message from response")
      self._handle_assignment_message(response.pplan)
    else:
      Log.debug("Received a register response with no pplan")
  def next_tuple(self):
    if self.is_done:
      return

    self.max_executions -= 1
    Log.info("max executions: %d" % self.max_executions)

    self.user_spout.next_tuple()

    if self.is_done:
      self._emit_terminal_if_needed()
      Log.info("This topology is finished.")
Example #21
0
def launch_topology_server(cl_args, topology_file, topology_defn_file, topology_name):
  '''
  Launch a topology given topology jar, its definition file and configurations
  :param cl_args:
  :param topology_file:
  :param topology_defn_file:
  :param topology_name:
  :return:
  '''
  service_apiurl = cl_args['service_url'] + rest.ROUTE_SIGNATURES['submit'][1]
  service_method = rest.ROUTE_SIGNATURES['submit'][0]
  data = dict(
      name=topology_name,
      cluster=cl_args['cluster'],
      role=cl_args['role'],
      environment=cl_args['environ'],
      user=cl_args['submit_user'],
  )

  Log.info("" + str(cl_args))
  overrides = dict()
  if 'config_property' in cl_args:
    overrides = config.parse_override_config(cl_args['config_property'])

  if overrides:
    data.update(overrides)

  if cl_args['dry_run']:
    data["dry_run"] = True

  files = dict(
      definition=open(topology_defn_file, 'rb'),
      topology=open(topology_file, 'rb'),
  )

  err_ctxt = "Failed to launch topology '%s' %s" % (topology_name, launch_mode_msg(cl_args))
  succ_ctxt = "Successfully launched topology '%s' %s" % (topology_name, launch_mode_msg(cl_args))

  try:
    r = service_method(service_apiurl, data=data, files=files)
    ok = r.status_code is requests.codes.ok
    created = r.status_code is requests.codes.created
    s = Status.Ok if created or ok else Status.HeronError
    if s is Status.HeronError:
      Log.error(r.json().get('message', "Unknown error from API server %d" % r.status_code))
    elif ok:
      # this case happens when we request a dry_run
      print(r.json().get("response"))
  except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as err:
    Log.error(err)
    return SimpleResult(Status.HeronError, err_ctxt, succ_ctxt)
  return SimpleResult(s, err_ctxt, succ_ctxt)
  def initialize(self, config, context):
    user_spout_classpath = config.get(integ_const.USER_SPOUT_CLASSPATH, None)
    if user_spout_classpath is None:
      raise RuntimeError("User defined integration test spout was not found")
    user_spout_cls = self._load_user_spout(context.get_topology_pex_path(), user_spout_classpath)
    self.user_spout = user_spout_cls(delegate=self)

    self.max_executions = config.get(integ_const.USER_MAX_EXECUTIONS, integ_const.MAX_EXECUTIONS)
    assert isinstance(self.max_executions, int) and self.max_executions > 0
    Log.info("Max executions: %d" % self.max_executions)
    self.tuples_to_complete = 0

    self.user_spout.initialize(config, context)
  def initialize(self, config, context):
    user_spout_classpath = config.get(integ_const.USER_SPOUT_CLASSPATH, None)
    if user_spout_classpath is None:
      raise RuntimeError("User defined integration test spout was not found")
    user_spout_cls = self._load_user_spout(context.get_topology_pex_path(), user_spout_classpath)
    self.user_spout = user_spout_cls(delegate=self)

    self.max_executions = config.get(integ_const.USER_MAX_EXECUTIONS, integ_const.MAX_EXECUTIONS)
    assert isinstance(self.max_executions, int) and self.max_executions > 0
    Log.info("Max executions: %d", self.max_executions)
    self.tuples_to_complete = 0

    self.user_spout.initialize(config, context)
Example #24
0
 def write_finished_data(self):
   json_result = json.dumps(self.result)
   Log.info("Actual result: %s", json_result)
   Log.info("Posting actual result to %s", self.http_post_url)
   try:
     response_code = self._post_result_to_server(json_result)
     if response_code != 200:
       # try again
       response_code = self._post_result_to_server(json_result)
       if response_code != 200:
         raise RuntimeError(f"Response code: {response_code}")
   except Exception as e:
     raise RuntimeError(f"Posting result to server failed with: {e.message}")
Example #25
0
def update_config_files(cl_args):
    Log.info("Updating config files...")
    roles = read_and_parse_roles(cl_args)
    Log.debug("roles: %s" % roles)
    masters = list(roles[Role.MASTERS])
    zookeepers = list(roles[Role.ZOOKEEPERS])

    template_slave_hcl(cl_args, masters)
    template_scheduler_yaml(cl_args, masters)
    template_uploader_yaml(cl_args, masters)
    template_apiserver_hcl(cl_args, masters, zookeepers)
    template_statemgr_yaml(cl_args, zookeepers)
    template_heron_tools_hcl(cl_args, masters, zookeepers)
Example #26
0
 def write_finished_data(self):
     json_result = json.dumps(self.result)
     Log.info("Actual result: %s" % json_result)
     Log.info("Posting actual result to %s" % self.http_post_url)
     try:
         response_code = self._post_result_to_server(json_result)
         if response_code != 200:
             # try again
             response_code = self._post_result_to_server(json_result)
             if response_code != 200:
                 raise RuntimeError("Response code: %d" % response_code)
     except Exception as e:
         raise RuntimeError("Posting result to server failed with: %s" % e.message)
Example #27
0
    def on_incoming_message(self, message):
        self.gateway_metrics.received_packet(message.ByteSize())

        if isinstance(message, stmgr_pb2.NewInstanceAssignmentMessage):
            Log.info(
                "Handling assignment message from direct NewInstanceAssignmentMessage"
            )
            self._handle_assignment_message(message.pplan)
        elif isinstance(message, stmgr_pb2.TupleMessage):
            self._handle_new_tuples(message)
        else:
            raise RuntimeError(
                "Unknown kind of message received from Stream Manager")
Example #28
0
    def _handle_register_response(self, response):
        """Called when a register response (RegisterInstanceResponse) arrives"""
        if response.status.status != common_pb2.StatusCode.Value("OK"):
            raise RuntimeError(
                "Stream Manager returned a not OK response for register")
        Log.info("We registered ourselves to the Stream Manager")

        self.is_registered = True
        if response.HasField("pplan"):
            Log.info("Handling assignment message from response")
            self._handle_assignment_message(response.pplan)
        else:
            Log.debug("Received a register response with no pplan")
Example #29
0
def main():
    parser = HeronArgumentParser(
        prog='heron',
        epilog=help_epilog,
        formatter_class=config.SubcommandHelpFormatter,
        fromfile_prefix_chars='@',
        add_help=False,
        rcfile="./.heronrc")
    parser.add_subparsers(title="Available commands",
                          metavar='<command> <options>')

    args, unknown_args = parser.parse_known_args()
    Log.info("parse results args: %s  unknown: %s ", args, unknown_args)
Example #30
0
def update_config_files(cl_args):
  Log.info("Updating config files...")
  roles = read_and_parse_roles(cl_args)
  Log.debug("roles: %s" % roles)
  masters = list(roles[Role.MASTERS])
  zookeepers = list(roles[Role.ZOOKEEPERS])

  template_slave_hcl(cl_args, masters)
  template_scheduler_yaml(cl_args, masters)
  template_uploader_yaml(cl_args, masters)
  template_apiserver_hcl(cl_args, masters, zookeepers)
  template_statemgr_yaml(cl_args, zookeepers)
  template_heron_tools_hcl(cl_args, masters, zookeepers)
Example #31
0
def parse_cluster_role_env(cluster_role_env, config_path):
  """Parse cluster/[role]/[environ], supply default, if not provided, not required"""
  parts = cluster_role_env.split('/')[:3]
  Log.info("Using config file under %s" % config_path)
  if not os.path.isdir(config_path):
    Log.error("Config path cluster directory does not exist: %s" % config_path)
    raise Exception("Invalid config path")

  # if cluster/role/env is not completely provided, check further
  if len(parts) < 3:

    cli_conf_file = os.path.join(config_path, CLIENT_YAML)

    # if client conf doesn't exist, use default value
    if not os.path.isfile(cli_conf_file):
      if len(parts) == 1:
        parts.append(getpass.getuser())
      if len(parts) == 2:
        parts.append(ENVIRON)
    else:
      cli_confs = {}
      with open(cli_conf_file, 'r') as conf_file:
        tmp_confs = yaml.load(conf_file)
        # the return value of yaml.load can be None if conf_file is an empty file
        if tmp_confs is not None:
          cli_confs = tmp_confs
        else:
          print "Failed to read: %s due to it is empty" % (CLIENT_YAML)

      # if role is required but not provided, raise exception
      if len(parts) == 1:
        if (IS_ROLE_REQUIRED in cli_confs) and (cli_confs[IS_ROLE_REQUIRED] is True):
          raise Exception("role required but not provided (cluster/role/env = %s). See %s in %s"
                          % (cluster_role_env, IS_ROLE_REQUIRED, CLIENT_YAML))
        else:
          parts.append(getpass.getuser())

      # if environ is required but not provided, raise exception
      if len(parts) == 2:
        if (IS_ENV_REQUIRED in cli_confs) and (cli_confs[IS_ENV_REQUIRED] is True):
          raise Exception("environ required but not provided (cluster/role/env = %s). See %s in %s"
                          % (cluster_role_env, IS_ENV_REQUIRED, CLIENT_YAML))
        else:
          parts.append(ENVIRON)

  # if cluster or role or environ is empty, print
  if len(parts[0]) == 0 or len(parts[1]) == 0 or len(parts[2]) == 0:
    print "Failed to parse"
    sys.exit(1)

  return (parts[0], parts[1], parts[2])
Example #32
0
    def addNewTopology(self, state_manager, topologyName):
        """
    Adds a topology in the local cache, and sets a watch
    on any changes on the topology.
    """
        topology = Topology(topologyName, state_manager.name)
        Log.info("Adding new topology: %s, state_manager: %s", topologyName,
                 state_manager.name)
        self.topologies.append(topology)

        # Register a watch on topology and change
        # the topologyInfo on any new change.
        topology.register_watch(self.setTopologyInfo)

        def on_topology_pplan(data):
            """watch physical plan"""
            Log.info("Watch triggered for topology pplan: " + topologyName)
            topology.set_physical_plan(data)
            if not data:
                Log.debug("No data to be set")

        def on_topology_execution_state(data):
            """watch execution state"""
            Log.info("Watch triggered for topology execution state: " +
                     topologyName)
            topology.set_execution_state(data)
            if not data:
                Log.debug("No data to be set")

        def on_topology_tmaster(data):
            """set tmaster"""
            Log.info("Watch triggered for topology tmaster: " + topologyName)
            topology.set_tmaster(data)
            if not data:
                Log.debug("No data to be set")

        def on_topology_scheduler_location(data):
            """set scheduler location"""
            Log.info("Watch triggered for topology scheduler location: " +
                     topologyName)
            topology.set_scheduler_location(data)
            if not data:
                Log.debug("No data to be set")

        # Set watches on the pplan, execution_state, tmaster and scheduler_location.
        state_manager.get_pplan(topologyName, on_topology_pplan)
        state_manager.get_execution_state(topologyName,
                                          on_topology_execution_state)
        state_manager.get_tmaster(topologyName, on_topology_tmaster)
        state_manager.get_scheduler_location(topologyName,
                                             on_topology_scheduler_location)
Example #33
0
    def on_topologies_watch(state_manager: StateManager, topologies: List[str]) -> None:
      """watch topologies"""
      topologies = set(topologies)
      Log.info("State watch triggered for topologies.")
      Log.debug("Topologies: %s", topologies)
      cached_names = {t.name for t in self.get_stmgr_topologies(state_manager.name)}
      Log.debug("Existing topologies: %s", cached_names)
      for name in cached_names - topologies:
        Log.info("Removing topology: %s in rootpath: %s",
                 name, state_manager.rootpath)
        self.remove_topology(name, state_manager.name)

      for name in topologies - cached_names:
        self.add_new_topology(state_manager, name)
Example #34
0
def distribute_package(roles, cl_args):
  '''
  distribute Heron packages to all nodes
  '''
  Log.info("Distributing heron package to nodes (this might take a while)...")
  masters = roles[Role.MASTERS]
  slaves = roles[Role.SLAVES]

  tar_file = tempfile.NamedTemporaryFile(suffix=".tmp").name
  Log.debug("TAR file %s to %s" % (cl_args["heron_dir"], tar_file))
  make_tarfile(tar_file, cl_args["heron_dir"])
  dist_nodes = masters.union(slaves)

  scp_package(tar_file, dist_nodes, cl_args)
Example #35
0
def launch_topologies(cl_args, topology_file, tmp_dir):
    '''
  Launch topologies
  :param cl_args:
  :param topology_file:
  :param tmp_dir:
  :return: list(Responses)
  '''
    # the submitter would have written the .defn file to the tmp_dir
    defn_files = glob.glob(tmp_dir + '/*.defn')

    if len(defn_files) == 0:
        return SimpleResult(Status.HeronError,
                            "No topologies found under %s" % tmp_dir)

    results = []
    for defn_file in defn_files:
        # load the topology definition from the file
        topology_defn = topology_pb2.Topology()
        try:
            handle = open(defn_file, "rb")
            topology_defn.ParseFromString(handle.read())
            handle.close()
        except Exception as e:
            err_context = "Cannot load topology definition '%s': %s" % (
                defn_file, e)
            return SimpleResult(Status.HeronError, err_context)

        # log topology and components configurations
        Log.debug("Topology config: %s", topology_defn.topology_config)
        Log.debug("Component config:")
        for spout in topology_defn.spouts:
            Log.debug("%s => %s", spout.comp.name, spout.comp.config)
        for bolt in topology_defn.bolts:
            Log.debug("%s => %s", bolt.comp.name, bolt.comp.config)

        # launch the topology
        Log.info("Launching topology: \'%s\'%s", topology_defn.name,
                 launch_mode_msg(cl_args))

        # check if we have to do server or direct based deployment
        if cl_args['deploy_mode'] == config.SERVER_MODE:
            res = launch_topology_server(cl_args, topology_file, defn_file,
                                         topology_defn.name)
        else:
            res = launch_a_topology(cl_args, tmp_dir, topology_file, defn_file,
                                    topology_defn.name)
        results.append(res)

    return results
Example #36
0
def main():
  parser = HeronArgumentParser(
      prog='heron',
      epilog=help_epilog,
      formatter_class=config.SubcommandHelpFormatter,
      fromfile_prefix_chars='@',
      add_help=False,
      rcfile="./.heronrc")
  parser.add_subparsers(
      title="Available commands",
      metavar='<command> <options>')

  args, unknown_args = parser.parse_known_args()
  Log.info("parse results args: %s  unknown: %s ", args, unknown_args)
Example #37
0
    def _handle_assignment_message(self, pplan):
        """Called when new NewInstanceAssignmentMessage arrives"""
        Log.debug("In handle_assignment_message() of STStmgrClient, Physical Plan: \n%s", str(pplan))
        new_helper = PhysicalPlanHelper(
            pplan, self.instance.instance_id, self.heron_instance_cls.topo_pex_file_abs_path
        )

        if self._pplan_helper is not None and (
            self._pplan_helper.my_component_name != new_helper.my_component_name
            or self._pplan_helper.my_task_id != new_helper.my_task_id
        ):
            raise RuntimeError("Our Assignment has changed. We will die to pick it.")

        if self._pplan_helper is None:
            Log.info("Received a new Physical Plan")
            Log.info("Push the new pplan_helper to Heron Instance")
            self.heron_instance_cls.handle_assignment_msg(new_helper)
        else:
            Log.info("Received a new Physical Plan with the same assignment -- State Change")
            Log.info(
                "Old state: %s, new state: %s.",
                self._pplan_helper.get_topology_state(),
                new_helper.get_topology_state(),
            )
            self.heron_instance_cls.handle_state_change_msg(new_helper)

        self._pplan_helper = new_helper
Example #38
0
    def _handle_assignment_message(self, pplan):
        """Called when new NewInstanceAssignmentMessage arrives"""
        Log.debug(
            "In handle_assignment_message() of STStmgrClient, Physical Plan: \n%s",
            str(pplan))
        new_helper = PhysicalPlanHelper(
            pplan, self.instance.instance_id,
            self.heron_instance_cls.topo_pex_file_abs_path)

        if self._pplan_helper is not None and \
          (self._pplan_helper.my_component_name != new_helper.my_component_name or
           self._pplan_helper.my_task_id != new_helper.my_task_id):
            raise RuntimeError(
                "Our Assignment has changed. We will die to pick it.")

        if self._pplan_helper is None:
            Log.info("Received a new Physical Plan")
            Log.info("Push the new pplan_helper to Heron Instance")
            self.heron_instance_cls.handle_assignment_msg(new_helper)
        else:
            Log.info(
                "Received a new Physical Plan with the same assignment -- State Change"
            )
            Log.info("Old state: %s, new state: %s.",
                     self._pplan_helper.get_topology_state(),
                     new_helper.get_topology_state())
            self.heron_instance_cls.handle_state_change_msg(new_helper)

        self._pplan_helper = new_helper
Example #39
0
 def handle_initiate_stateful_checkpoint(self, ckptmsg, component):
     Log.info("Received initiate state checkpoint message for %s" %
              ckptmsg.checkpoint_id)
     if not self.is_stateful:
         raise RuntimeError(
             "Received state checkpoint message but we are not stateful topology"
         )
     if isinstance(component, StatefulComponent):
         component.pre_save(ckptmsg.checkpoint_id)
     else:
         Log.info(
             "Trying to checkponit a non stateful component. Send empty state"
         )
     self.admit_ckpt_state(ckptmsg.checkpoint_id, self._stateful_state)
Example #40
0
def distribute_package(roles, cl_args):
  '''
  distribute Heron packages to all nodes
  '''
  Log.info("Distributing heron package to nodes (this might take a while)...")
  masters = roles[Role.MASTERS]
  slaves = roles[Role.SLAVES]

  tar_file = tempfile.NamedTemporaryFile(suffix=".tmp").name
  Log.debug("TAR file %s to %s" % (cl_args["heron_dir"], tar_file))
  make_tarfile(tar_file, cl_args["heron_dir"])
  dist_nodes = masters.union(slaves)

  scp_package(tar_file, dist_nodes, cl_args)
Example #41
0
    def emit(self, tup, tup_id=None, stream=Stream.DEFAULT_STREAM_ID, direct_task=None, need_task_ids=None):
        """Emits from this integration test spout

    Overriden method which will be called when user's spout calls emit()
    """
        # if is_control True -> control stream should not count
        self.tuples_to_complete += 1

        if tup_id is None:
            Log.info("Add tup_id for tuple: %s" % str(tup))
            _tup_id = integ_const.INTEGRATION_TEST_MOCK_MESSAGE_ID
        else:
            _tup_id = tup_id

        super(IntegrationTestSpout, self).emit(tup, _tup_id, stream, direct_task, need_task_ids)
Example #42
0
    def on_topologies_watch(state_manager, topologies) -> None:
      """watch topologies"""
      Log.info("State watch triggered for topologies.")
      Log.debug("Topologies: " + str(topologies))
      cached_names = [t.name for t in self.get_stmgr_topologies(state_manager.name)]
      Log.debug(f"Existing topologies: {cached_names}")
      for name in cached_names:
        if name not in topologies:
          Log.info("Removing topology: %s in rootpath: %s",
                   name, state_manager.rootpath)
          self.remove_topology(name, state_manager.name)

      for name in topologies:
        if name not in cached_names:
          self.add_new_topology(state_manager, name)
Example #43
0
def stop_cluster(cl_args):
    '''
  teardown the cluster
  '''
    Log.info("Terminating cluster...")

    roles = read_and_parse_roles(cl_args)
    masters = roles[Role.MASTERS]
    slaves = roles[Role.SLAVES]
    dist_nodes = masters.union(slaves)

    # stop all jobs
    if masters:
        try:
            single_master = list(masters)[0]
            jobs = get_jobs(cl_args, single_master)
            for job in jobs:
                job_id = job["ID"]
                Log.info("Terminating job %s" % job_id)
                delete_job(cl_args, job_id, single_master)
        except:
            Log.debug("Error stopping jobs")
            Log.debug(sys.exc_info()[0])

    for node in dist_nodes:
        Log.info("Terminating processes on %s" % node)
        if not is_self(node):
            cmd = "ps aux | grep heron-nomad | awk '{print \$2}' " \
                  "| xargs kill"
            cmd = ssh_remote_execute(cmd, node, cl_args)
        else:
            cmd = "ps aux | grep heron-nomad | awk '{print $2}' " \
                  "| xargs kill"
        Log.debug(cmd)
        pid = subprocess.Popen(cmd,
                               shell=True,
                               universal_newlines=True,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)

        return_code = pid.wait()
        output = pid.communicate()
        Log.debug("return code: %s output: %s" % (return_code, output))

        Log.info("Cleaning up directories on %s" % node)
        cmd = "rm -rf /tmp/slave ; rm -rf /tmp/master"
        if not is_self(node):
            cmd = ssh_remote_execute(cmd, node, cl_args)
        Log.debug(cmd)
        pid = subprocess.Popen(cmd,
                               shell=True,
                               universal_newlines=True,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)

        return_code = pid.wait()
        output = pid.communicate()
        Log.debug("return code: %s output: %s" % (return_code, output))
  def emit(self, tup, tup_id=None, stream=Stream.DEFAULT_STREAM_ID,
           direct_task=None, need_task_ids=None):
    """Emits from this integration test spout

    Overriden method which will be called when user's spout calls emit()
    """
    # if is_control True -> control stream should not count
    self.tuples_to_complete += 1

    if tup_id is None:
      Log.info("Add tup_id for tuple: %s", str(tup))
      _tup_id = integ_const.INTEGRATION_TEST_MOCK_MESSAGE_ID
    else:
      _tup_id = tup_id

    super().emit(tup, _tup_id, stream, direct_task, need_task_ids)
Example #45
0
    def on_topologies_watch(state_manager, topologies):
      """watch topologies"""
      Log.info("State watch triggered for topologies.")
      Log.debug("Topologies: " + str(topologies))
      existingTopologies = self.getTopologiesForStateLocation(state_manager.name)
      existingTopNames = map(lambda t: t.name, existingTopologies)
      Log.debug("Existing topologies: " + str(existingTopNames))
      for name in existingTopNames:
        if name not in topologies:
          Log.info("Removing topology: %s in rootpath: %s",
                   name, state_manager.rootpath)
          self.removeTopology(name, state_manager.name)

      for name in topologies:
        if name not in existingTopNames:
          self.addNewTopology(state_manager, name)
Example #46
0
    def on_topologies_watch(state_manager, topologies):
      """watch topologies"""
      Log.info("State watch triggered for topologies.")
      Log.debug("Topologies: " + str(topologies))
      existingTopologies = self.getTopologiesForStateLocation(state_manager.name)
      existingTopNames = map(lambda t: t.name, existingTopologies)
      Log.debug("Existing topologies: " + str(existingTopNames))
      for name in existingTopNames:
        if name not in topologies:
          Log.info("Removing topology: %s in rootpath: %s",
                   name, state_manager.rootpath)
          self.removeTopology(name, state_manager.name)

      for name in topologies:
        if name not in existingTopNames:
          self.addNewTopology(state_manager, name)
Example #47
0
def run(command, parser, cl_args, unknown_args):
    """ run the update command """
    topology_name = cl_args['topology-name']
    try:
        new_args = [
            "--cluster",
            cl_args['cluster'],
            "--role",
            cl_args['role'],
            "--environment",
            cl_args['environ'],
            "--heron_home",
            config.get_heron_dir(),
            "--config_path",
            cl_args['config_path'],
            "--override_config_file",
            cl_args['override_config_file'],
            "--release_file",
            config.get_heron_release_file(),
            "--topology_name",
            topology_name,
            "--command",
            command,
            "--component_parallelism",
            ','.join(cl_args['component_parallelism']),
        ]

        if Log.getEffectiveLevel() == logging.DEBUG:
            new_args.append("--verbose")

        lib_jars = config.get_heron_libs(jars.scheduler_jars() +
                                         jars.statemgr_jars() +
                                         jars.packing_jars())

        # invoke the runtime manager to kill the topology
        execute.heron_class('com.twitter.heron.scheduler.RuntimeManagerMain',
                            lib_jars,
                            extra_jars=[],
                            args=new_args)

    except Exception as ex:
        Log.error('Failed to update topology \'%s\': %s', topology_name,
                  traceback.format_exc(ex))
        return False

    Log.info('Successfully updated topology \'%s\'' % topology_name)
    return True
 def emit(self,
          tup,
          stream=Stream.DEFAULT_STREAM_ID,
          anchors=None,
          direct_task=None,
          need_task_ids=False):
     Log.info("emitting tuple: %s", tup)
     if tup is None:
         super(IntegrationTestBolt,
               self).emit(list(self.current_tuple_processing),
                          stream=stream,
                          anchors=anchors,
                          direct_task=direct_task,
                          need_task_ids=need_task_ids)
     else:
         super(IntegrationTestBolt, self).emit(tup, stream, anchors,
                                               direct_task, need_task_ids)
  def _gather_one_metric(self, name, message):
    metric_value = self.metrics_map[name].get_value_and_reset()
    Log.debug("In gather_one_metric with name: %s, and value: %s", name, str(metric_value))

    if metric_value is None:
      return
    elif isinstance(metric_value, dict):
      for key, value in list(metric_value.items()):
        if key is not None and value is not None:
          self._add_data_to_message(message, name + "/" + str(key), value)
          self._add_data_to_message(message, "%s/%s" % (name, str(key)), value)
        else:
          Log.info("When gathering metric: %s, <%s:%s> is not a valid key-value to output "
                   "as metric. Skipping...", name, str(key), str(value))
          continue
    else:
      self._add_data_to_message(message, name, metric_value)
Example #50
0
    def _handle_packet(self, packet):
        # only called when packet.is_complete is True
        # otherwise, it's just an message -- call on_incoming_message()
        typename, reqid, serialized_msg = HeronProtocol.decode_packet(packet)
        if self.context_map.has_key(reqid):
            # this incoming packet has the response of a request
            context = self.context_map.pop(reqid)
            response_msg = self.response_message_map.pop(reqid)

            try:
                response_msg.ParseFromString(serialized_msg)
            except Exception as e:
                Log.error("Invalid Packet Error: %s" % e.message)
                self._handle_close()
                self.on_error()
                return

            if response_msg.IsInitialized():
                self.on_response(StatusCode.OK, context, response_msg)
            else:
                Log.error("Response not initialized")
                self._handle_close()
                self.on_error()
        elif reqid.is_zero():
            # this is a Message -- no need to send back response
            try:
                if typename not in self.registered_message_map:
                    raise ValueError("%s is not registered in message map" %
                                     typename)
                msg_builder = self.registered_message_map[typename]
                message = msg_builder()
                message.ParseFromString(serialized_msg)
                if message.IsInitialized():
                    self.on_incoming_message(message)
                else:
                    raise RuntimeError("Message not initialized")
            except Exception as e:
                Log.error("Error when handling message packet: %s" % e.message)
                Log.error(traceback.format_exc())
                raise RuntimeError("Problem reading message")
        else:
            # might be a timeout response
            Log.info(
                "In handle_packet(): Received message whose REQID is not registered: %s"
                % str(reqid))
Example #51
0
 def on_incoming_message(self, message):
     self.gateway_metrics.update_received_packet(message.ByteSize())
     if isinstance(message, stmgr_pb2.NewInstanceAssignmentMessage):
         Log.info(
             "Handling assignment message from direct NewInstanceAssignmentMessage"
         )
         self._handle_assignment_message(message.pplan)
     elif isinstance(message, tuple_pb2.HeronTupleSet2):
         self._handle_new_tuples_2(message)
     elif isinstance(message, ckptmgr_pb2.StartInstanceStatefulProcessing):
         self._handle_start_stateful_processing(message)
     elif isinstance(message, ckptmgr_pb2.RestoreInstanceStateRequest):
         self._handle_restore_instance_state(message)
     elif isinstance(message, ckptmgr_pb2.InitiateStatefulCheckpoint):
         self._handle_initiate_stateful_checkpoint(message)
     else:
         raise RuntimeError(
             "Unknown kind of message received from Stream Manager")
Example #52
0
  def add_new_topology(self, state_manager, topology_name: str) -> None:
    """
    Adds a topology in the local cache, and sets a watch
    on any changes on the topology.
    """
    topology = Topology(topology_name, state_manager.name, self.config)
    Log.info("Adding new topology: %s, state_manager: %s",
             topology_name, state_manager.name)
    # populate the cache before making it addressable in the topologies to
    # avoid races due to concurrent execution
    self.topologies.append(topology)

    # Set watches on the pplan, execution_state, tmanager and scheduler_location.
    state_manager.get_pplan(topology_name, topology.set_physical_plan)
    state_manager.get_packing_plan(topology_name, topology.set_packing_plan)
    state_manager.get_execution_state(topology_name, topology.set_execution_state)
    state_manager.get_tmanager(topology_name, topology.set_tmanager)
    state_manager.get_scheduler_location(topology_name, topology.set_scheduler_location)
Example #53
0
def wait_for_master_to_start(single_master):
    '''
  Wait for a nomad master to start
  '''
    i = 0
    while True:
        try:
            r = requests.get("http://%s:4646/v1/status/leader" % single_master)
            if r.status_code == 200:
                break
        except:
            Log.debug(sys.exc_info()[0])
            Log.info("Waiting for cluster to come up... %s" % i)
            time.sleep(1)
            if i > 10:
                Log.error("Failed to start Nomad Cluster!")
                sys.exit(-1)
        i = i + 1
Example #54
0
def run(command, parser, cl_args, unknown_args, action):
  '''
  helper function to take action on topologies
  :param command:
  :param parser:
  :param cl_args:
  :param unknown_args:
  :param action:        description of action taken
  :return:
  '''
  try:
    topology_name = cl_args['topology-name']

    new_args = [
        "--cluster", cl_args['cluster'],
        "--role", cl_args['role'],
        "--environment", cl_args['environ'],
        "--heron_home", config.get_heron_dir(),
        "--config_path", cl_args['config_path'],
        "--override_config_file", cl_args['override_config_file'],
        "--release_file", config.get_heron_release_file(),
        "--topology_name", topology_name,
        "--command", command,
    ]

    if Log.getEffectiveLevel() == logging.DEBUG:
      new_args.append("--verbose")

    lib_jars = config.get_heron_libs(jars.scheduler_jars() + jars.statemgr_jars())

    # invoke the runtime manager to kill the topology
    execute.heron_class(
        'com.twitter.heron.scheduler.RuntimeManagerMain',
        lib_jars,
        extra_jars=[],
        args=new_args
    )

  except Exception:
    Log.error('Failed to %s \'%s\'' % (action, topology_name))
    return False

  Log.info('Successfully executed %s \'%s\'' % (action, topology_name))
  return True
Example #55
0
def wait_for_job_to_start(single_master, job):
    '''
  Wait for a Nomad job to start
  '''
    i = 0
    while True:
        try:
            r = requests.get("http://%s:4646/v1/job/%s" % (single_master, job))
            if r.status_code == 200 and r.json()["Status"] == "running":
                break
            raise RuntimeError()
        except:
            Log.debug(sys.exc_info()[0])
            Log.info("Waiting for %s to come up... %s" % (job, i))
            time.sleep(1)
            if i > 20:
                Log.error("Failed to start Nomad Cluster!")
                sys.exit(-1)
        i = i + 1
Example #56
0
def launch_topologies(cl_args, topology_file, tmp_dir):
    '''
  Launch topologies
  :param cl_args:
  :param topology_file:
  :param tmp_dir:
  :return:
  '''
    # the submitter would have written the .defn file to the tmp_dir
    defn_files = glob.glob(tmp_dir + '/*.defn')

    if len(defn_files) == 0:
        raise Exception("No topologies found")

    try:
        for defn_file in defn_files:

            # load the topology definition from the file
            topology_defn = topology_pb2.Topology()
            try:
                handle = open(defn_file, "rb")
                topology_defn.ParseFromString(handle.read())
                handle.close()

            except:
                raise Exception(
                    "Could not open and parse topology defn file %s" %
                    defn_file)

            # launch the topology
            try:
                Log.info("Launching topology \'%s\'" % topology_defn.name)
                launch_a_topology(cl_args, tmp_dir, topology_file, defn_file)
                Log.info("Topology \'%s\' launched successfully" %
                         topology_defn.name)

            except Exception as ex:
                Log.exception('Failed to launch topology \'%s\' because %s' %
                              (topology_defn.name, str(ex)))
                raise

    except:
        raise
Example #57
0
  def add_new_topology(self, state_manager, topology_name: str) -> None:
    """
    Adds a topology in the local cache, and sets a watch
    on any changes on the topology.
    """
    topology = Topology(topology_name, state_manager.name)
    Log.info("Adding new topology: %s, state_manager: %s",
             topology_name, state_manager.name)
    self.topologies.append(topology)

    # Register a watch on topology and change
    # the topology_info on any new change.
    topology.register_watch(self.set_topology_info)

    # Set watches on the pplan, execution_state, tmanager and scheduler_location.
    state_manager.get_pplan(topology_name, topology.set_physical_plan)
    state_manager.get_packing_plan(topology_name, topology.set_packing_plan)
    state_manager.get_execution_state(topology_name, topology.set_execution_state)
    state_manager.get_tmanager(topology_name, topology.set_tmanager)
    state_manager.get_scheduler_location(topology_name, topology.set_scheduler_location)
Example #58
0
 def on_incoming_message(self, message):
   self.gateway_metrics.update_received_packet(message.ByteSize())
   try:
     if isinstance(message, stmgr_pb2.NewInstanceAssignmentMessage):
       Log.info("Handling assignment message from direct NewInstanceAssignmentMessage")
       self._handle_assignment_message(message.pplan)
     elif isinstance(message, tuple_pb2.HeronTupleSet2):
       self._handle_new_tuples_2(message)
     elif isinstance(message, ckptmgr_pb2.StartInstanceStatefulProcessing):
       self._handle_start_stateful_processing(message)
     elif isinstance(message, ckptmgr_pb2.RestoreInstanceStateRequest):
       self._handle_restore_instance_state(message)
     elif isinstance(message, ckptmgr_pb2.InitiateStatefulCheckpoint):
       self._handle_initiate_stateful_checkpoint(message)
     else:
       raise RuntimeError("Unknown kind of message received from Stream Manager")
   except Exception as e:
     Log.error("Error happened while handling a message from stmgr: " + str(e))
     Log.error(traceback.format_exc())
     sys.exit(1)
Example #59
0
def scp_package(package_file, destinations, cl_args):
    '''
  scp and extract package
  '''
    pids = []
    for dest in destinations:
        if is_self(dest):
            continue
        Log.info("Server: %s" % dest)
        file_path = "/tmp/heron.tar.gz"
        dest_file_path = "%s:%s" % (dest, file_path)

        remote_cmd = "rm -rf ~/.heron && mkdir ~/.heron " \
                     "&& tar -xzvf %s -C ~/.heron --strip-components 1" % (file_path)
        cmd = '%s && %s' \
              % (scp_cmd(package_file, dest_file_path, cl_args),
                 ssh_remote_execute(remote_cmd, dest, cl_args))
        Log.debug(cmd)
        pid = subprocess.Popen(cmd,
                               shell=True,
                               universal_newlines=True,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
        pids.append({"pid": pid, "dest": dest})

    errors = []
    for entry in pids:
        pid = entry["pid"]
        return_code = pid.wait()
        output = pid.communicate()
        Log.debug("return code: %s output: %s" % (return_code, output))
        if return_code != 0:
            errors.append("Failed to scp package to %s with error:\n%s" %
                          (entry["dest"], output[1]))

    if errors:
        for error in errors:
            Log.error(error)
        sys.exit(-1)

    Log.info("Done distributing packages")
Example #60
0
    def __init__(self, pplan_helper, in_stream, out_stream, looper):
        super(BoltInstance, self).__init__(pplan_helper, in_stream, out_stream,
                                           looper)

        if self.pplan_helper.is_spout:
            raise RuntimeError("No bolt in physical plan")

        # bolt_config is auto-typed, not <str -> str> only
        context = self.pplan_helper.context
        self.bolt_metrics = BoltMetrics(self.pplan_helper)
        self.serializer = SerializerHelper.get_serializer(context)

        # acking related
        self.acking_enabled = context.get_cluster_config().get(
            constants.TOPOLOGY_ENABLE_ACKING, False)
        Log.info("Enable ACK: %s" % str(self.acking_enabled))

        # load user's bolt class
        bolt_impl_class = super(BoltInstance,
                                self).load_py_instance(is_spout=False)
        self.bolt_impl = bolt_impl_class(delegate=self)