Example #1
0
 def __init__(self, instance_id, function_id, function_version, function_details, max_buffered_tuples,
              expected_healthcheck_interval, user_code, pulsar_client, secrets_provider, cluster_name):
   self.instance_config = InstanceConfig(instance_id, function_id, function_version, function_details, max_buffered_tuples)
   self.user_code = user_code
   self.queue = queue.Queue(max_buffered_tuples)
   self.log_topic_handler = None
   if function_details.logTopic is not None and function_details.logTopic != "":
     self.log_topic_handler = log.LogTopicHandler(str(function_details.logTopic), pulsar_client)
   self.pulsar_client = pulsar_client
   self.input_serdes = {}
   self.consumers = {}
   self.output_serde = None
   self.function_class = None
   self.function_purefunction = None
   self.producer = None
   self.execution_thread = None
   self.atmost_once = self.instance_config.function_details.processingGuarantees == Function_pb2.ProcessingGuarantees.Value('ATMOST_ONCE')
   self.atleast_once = self.instance_config.function_details.processingGuarantees == Function_pb2.ProcessingGuarantees.Value('ATLEAST_ONCE')
   self.auto_ack = self.instance_config.function_details.autoAck
   self.contextimpl = None
   self.last_health_check_ts = time.time()
   self.timeout_ms = function_details.source.timeoutMs if function_details.source.timeoutMs > 0 else None
   self.expected_healthcheck_interval = expected_healthcheck_interval
   self.secrets_provider = secrets_provider
   self.metrics_labels = [function_details.tenant,
                          "%s/%s" % (function_details.tenant, function_details.namespace),
                          function_details.name,
                          instance_id, cluster_name,
                          "%s/%s/%s" % (function_details.tenant, function_details.namespace, function_details.name)]
   self.stats = Stats(self.metrics_labels)
Example #2
0
class PythonInstance(object):
  def __init__(self, instance_id, function_id, function_version, function_details, max_buffered_tuples,
               expected_healthcheck_interval, user_code, pulsar_client, secrets_provider, cluster_name):
    self.instance_config = InstanceConfig(instance_id, function_id, function_version, function_details, max_buffered_tuples)
    self.user_code = user_code
    self.queue = queue.Queue(max_buffered_tuples)
    self.log_topic_handler = None
    if function_details.logTopic is not None and function_details.logTopic != "":
      self.log_topic_handler = log.LogTopicHandler(str(function_details.logTopic), pulsar_client)
    self.pulsar_client = pulsar_client
    self.input_serdes = {}
    self.consumers = {}
    self.output_serde = None
    self.function_class = None
    self.function_purefunction = None
    self.producer = None
    self.execution_thread = None
    self.atmost_once = self.instance_config.function_details.processingGuarantees == Function_pb2.ProcessingGuarantees.Value('ATMOST_ONCE')
    self.atleast_once = self.instance_config.function_details.processingGuarantees == Function_pb2.ProcessingGuarantees.Value('ATLEAST_ONCE')
    self.auto_ack = self.instance_config.function_details.autoAck
    self.contextimpl = None
    self.last_health_check_ts = time.time()
    self.timeout_ms = function_details.source.timeoutMs if function_details.source.timeoutMs > 0 else None
    self.expected_healthcheck_interval = expected_healthcheck_interval
    self.secrets_provider = secrets_provider
    self.metrics_labels = [function_details.tenant,
                           "%s/%s" % (function_details.tenant, function_details.namespace),
                           function_details.name,
                           instance_id, cluster_name,
                           "%s/%s/%s" % (function_details.tenant, function_details.namespace, function_details.name)]
    self.stats = Stats(self.metrics_labels)

  def health_check(self):
    self.last_health_check_ts = time.time()
    health_check_result = InstanceCommunication_pb2.HealthCheckResult()
    health_check_result.success = True
    return health_check_result

  def process_spawner_health_check_timer(self):
    if time.time() - self.last_health_check_ts > self.expected_healthcheck_interval * 3:
      Log.critical("Haven't received health check from spawner in a while. Stopping instance...")
      os.kill(os.getpid(), signal.SIGKILL)
      sys.exit(1)

    Timer(self.expected_healthcheck_interval, self.process_spawner_health_check_timer).start()

  def run(self):
    # Setup consumers and input deserializers
    mode = pulsar._pulsar.ConsumerType.Shared
    if self.instance_config.function_details.source.subscriptionType == Function_pb2.SubscriptionType.Value("FAILOVER"):
      mode = pulsar._pulsar.ConsumerType.Failover

    subscription_name = str(self.instance_config.function_details.tenant) + "/" + \
                        str(self.instance_config.function_details.namespace) + "/" + \
                        str(self.instance_config.function_details.name)
    for topic, serde in self.instance_config.function_details.source.topicsToSerDeClassName.items():
      if not serde:
        serde_kclass = util.import_class(os.path.dirname(self.user_code), DEFAULT_SERIALIZER)
      else:
        serde_kclass = util.import_class(os.path.dirname(self.user_code), serde)
      self.input_serdes[topic] = serde_kclass()
      Log.debug("Setting up consumer for topic %s with subname %s" % (topic, subscription_name))
      self.consumers[topic] = self.pulsar_client.subscribe(
        str(topic), subscription_name,
        consumer_type=mode,
        message_listener=partial(self.message_listener, self.input_serdes[topic]),
        unacked_messages_timeout_ms=int(self.timeout_ms) if self.timeout_ms else None
      )

    for topic, consumer_conf in self.instance_config.function_details.source.inputSpecs.items():
      if not consumer_conf.serdeClassName:
        serde_kclass = util.import_class(os.path.dirname(self.user_code), DEFAULT_SERIALIZER)
      else:
        serde_kclass = util.import_class(os.path.dirname(self.user_code), consumer_conf.serdeClassName)
      self.input_serdes[topic] = serde_kclass()
      Log.debug("Setting up consumer for topic %s with subname %s" % (topic, subscription_name))
      if consumer_conf.isRegexPattern:
        self.consumers[topic] = self.pulsar_client.subscribe(
          re.compile(str(topic)), subscription_name,
          consumer_type=mode,
          message_listener=partial(self.message_listener, self.input_serdes[topic]),
          unacked_messages_timeout_ms=int(self.timeout_ms) if self.timeout_ms else None
        )
      else:
        self.consumers[topic] = self.pulsar_client.subscribe(
          str(topic), subscription_name,
          consumer_type=mode,
          message_listener=partial(self.message_listener, self.input_serdes[topic]),
          unacked_messages_timeout_ms=int(self.timeout_ms) if self.timeout_ms else None
        )

    function_kclass = util.import_class(os.path.dirname(self.user_code), self.instance_config.function_details.className)
    if function_kclass is None:
      Log.critical("Could not import User Function Module %s" % self.instance_config.function_details.className)
      raise NameError("Could not import User Function Module %s" % self.instance_config.function_details.className)
    try:
      self.function_class = function_kclass()
    except:
      self.function_purefunction = function_kclass

    self.contextimpl = contextimpl.ContextImpl(self.instance_config, Log, self.pulsar_client,
                                               self.user_code, self.consumers,
                                               self.secrets_provider, self.metrics_labels)
    # Now launch a thread that does execution
    self.execution_thread = threading.Thread(target=self.actual_execution)
    self.execution_thread.start()

    # start proccess spawner health check timer
    self.last_health_check_ts = time.time()
    if self.expected_healthcheck_interval > 0:
      Timer(self.expected_healthcheck_interval, self.process_spawner_health_check_timer).start()

  def actual_execution(self):
    Log.debug("Started Thread for executing the function")

    while True:
      try:
        msg = self.queue.get(True)
        if isinstance(msg, InternalQuitMessage):
          break
        Log.debug("Got a message from topic %s" % msg.topic)
        # deserialize message
        input_object = msg.serde.deserialize(msg.message.data())
        # set current message in context
        self.contextimpl.set_current_message_context(msg.message, msg.topic)
        output_object = None
        self.saved_log_handler = None
        if self.log_topic_handler is not None:
          self.saved_log_handler = log.remove_all_handlers()
          log.add_handler(self.log_topic_handler)
        successfully_executed = False
        try:
          # get user function start time for statistic calculation
          self.stats.set_last_invocation(time.time())

          # start timer for process time
          self.stats.process_time_start()
          if self.function_class is not None:
            output_object = self.function_class.process(input_object, self.contextimpl)
          else:
            output_object = self.function_purefunction.process(input_object)
          successfully_executed = True

          # stop timer for process time
          self.stats.process_time_end()
        except Exception as e:
          Log.exception("Exception while executing user method")
          self.stats.incr_total_user_exceptions(e)

        if self.log_topic_handler is not None:
          log.remove_all_handlers()
          log.add_handler(self.saved_log_handler)
        if successfully_executed:
          self.process_result(output_object, msg)
          self.stats.incr_total_processed_successfully()

      except Exception as e:
        Log.error("Uncaught exception in Python instance: %s" % e);
        self.stats.incr_total_sys_exceptions(e)

  def done_producing(self, consumer, orig_message, result, sent_message):
    if result == pulsar.Result.Ok and self.auto_ack and self.atleast_once:
      consumer.acknowledge(orig_message)

  def process_result(self, output, msg):
    if output is not None and self.instance_config.function_details.sink.topic != None and \
            len(self.instance_config.function_details.sink.topic) > 0:
      if self.output_serde is None:
        self.setup_output_serde()
      if self.producer is None:
        self.setup_producer()

      # serialize function output
      output_bytes = self.output_serde.serialize(output)

      if output_bytes is not None:
        props = {"__pfn_input_topic__" : str(msg.topic), "__pfn_input_msg_id__" : base64ify(msg.message.message_id().serialize())}
        self.producer.send_async(output_bytes, partial(self.done_producing, msg.consumer, msg.message), properties=props)
    elif self.auto_ack and self.atleast_once:
      msg.consumer.acknowledge(msg.message)

  def setup_output_serde(self):
    if self.instance_config.function_details.sink.serDeClassName != None and \
            len(self.instance_config.function_details.sink.serDeClassName) > 0:
      serde_kclass = util.import_class(os.path.dirname(self.user_code), self.instance_config.function_details.sink.serDeClassName)
      self.output_serde = serde_kclass()
    else:
      global DEFAULT_SERIALIZER
      serde_kclass = util.import_class(os.path.dirname(self.user_code), DEFAULT_SERIALIZER)
      self.output_serde = serde_kclass()

  def setup_producer(self):
    if self.instance_config.function_details.sink.topic != None and \
            len(self.instance_config.function_details.sink.topic) > 0:
      Log.debug("Setting up producer for topic %s" % self.instance_config.function_details.sink.topic)

      self.producer = self.pulsar_client.create_producer(
        str(self.instance_config.function_details.sink.topic),
        block_if_queue_full=True,
        batching_enabled=True,
        batching_max_publish_delay_ms=1,
        # set send timeout to be infinity to prevent potential deadlock with consumer
        # that might happen when consumer is blocked due to unacked messages
        send_timeout_millis=0,
        max_pending_messages=100000)

  def message_listener(self, serde, consumer, message):
    # increment number of received records from source
    self.stats.incr_total_received()
    item = InternalMessage(message, consumer.topic(), serde, consumer)
    self.queue.put(item, True)
    if self.atmost_once and self.auto_ack:
      consumer.acknowledge(message)

  def get_and_reset_metrics(self):
    # First get any user metrics
    metrics = self.get_metrics()
    self.reset_metrics()
    return metrics

  def reset_metrics(self):
    self.stats.reset()
    self.contextimpl.reset_metrics()

  def get_metrics(self):

    total_received =  self.stats.get_total_received()
    total_processed_successfully = self.stats.get_total_processed_successfully()
    total_user_exceptions = self.stats.get_total_user_exceptions()
    total_sys_exceptions = self.stats.get_total_sys_exceptions()
    avg_process_latency_ms = self.stats.get_avg_process_latency()
    last_invocation = self.stats.get_last_invocation()

    total_received_1min = self.stats.get_total_received_1min()
    total_processed_successfully_1min = self.stats.get_total_processed_successfully_1min()
    total_user_exceptions_1min = self.stats.get_total_user_exceptions_1min()
    total_sys_exceptions_1min = self.stats.get_total_sys_exceptions_1min()
    avg_process_latency_ms_1min = self.stats.get_avg_process_latency_1min()

    metrics_data = InstanceCommunication_pb2.MetricsData()
    # total metrics
    metrics_data.receivedTotal = int(total_received) if sys.version_info.major >= 3 else long(total_received)
    metrics_data.processedSuccessfullyTotal = int(total_processed_successfully) if sys.version_info.major >= 3 else long(total_processed_successfully)
    metrics_data.systemExceptionsTotal = int(total_sys_exceptions) if sys.version_info.major >= 3 else long(total_sys_exceptions)
    metrics_data.userExceptionsTotal = int(total_user_exceptions) if sys.version_info.major >= 3 else long(total_user_exceptions)
    metrics_data.avgProcessLatency = avg_process_latency_ms
    metrics_data.lastInvocation = int(last_invocation) if sys.version_info.major >= 3 else long(last_invocation)
    # 1min metrics
    metrics_data.receivedTotal_1min = int(total_received_1min) if sys.version_info.major >= 3 else long(total_received_1min)
    metrics_data.processedSuccessfullyTotal_1min = int(
      total_processed_successfully_1min) if sys.version_info.major >= 3 else long(total_processed_successfully_1min)
    metrics_data.systemExceptionsTotal_1min = int(total_sys_exceptions_1min) if sys.version_info.major >= 3 else long(
      total_sys_exceptions_1min)
    metrics_data.userExceptionsTotal_1min = int(total_user_exceptions_1min) if sys.version_info.major >= 3 else long(
      total_user_exceptions_1min)
    metrics_data.avgProcessLatency_1min = avg_process_latency_ms_1min

    # get any user metrics
    user_metrics = self.contextimpl.get_metrics()
    for metric_name, value in user_metrics.items():
      metrics_data.userMetrics[metric_name] = value

    return metrics_data

  def add_system_metrics(self, metric_name, value, metrics):
    metrics.metrics[metric_name].count = value
    metrics.metrics[metric_name].sum = value
    metrics.metrics[metric_name].min = 0
    metrics.metrics[metric_name].max = value

  def get_function_status(self):
    status = InstanceCommunication_pb2.FunctionStatus()
    status.running = True

    total_received = self.stats.get_total_received()
    total_processed_successfully = self.stats.get_total_processed_successfully()
    total_user_exceptions = self.stats.get_total_user_exceptions()
    total_sys_exceptions = self.stats.get_total_sys_exceptions()
    avg_process_latency_ms = self.stats.get_avg_process_latency()
    last_invocation = self.stats.get_last_invocation()

    status.numReceived = int(total_received) if sys.version_info.major >= 3 else long(total_received)
    status.numSuccessfullyProcessed = int(total_processed_successfully) if sys.version_info.major >= 3 else long(total_processed_successfully)
    status.numUserExceptions = int(total_user_exceptions) if sys.version_info.major >= 3 else long(total_user_exceptions)
    status.instanceId = self.instance_config.instance_id
    for ex, tm in self.stats.latest_user_exception:
      to_add = status.latestUserExceptions.add()
      to_add.exceptionString = ex
      to_add.msSinceEpoch = tm
    status.numSystemExceptions = int(total_sys_exceptions) if sys.version_info.major >= 3 else long(total_sys_exceptions)
    for ex, tm in self.stats.latest_sys_exception:
      to_add = status.latestSystemExceptions.add()
      to_add.exceptionString = ex
      to_add.msSinceEpoch = tm
    status.averageLatency = avg_process_latency_ms
    status.lastInvocationTime = int(last_invocation) if sys.version_info.major >= 3 else long(last_invocation)
    return status

  def join(self):
    self.queue.put(InternalQuitMessage(True), True)
    self.execution_thread.join()