Exemple #1
0
    def generate_command(self, command_header):
        cluster_id = str(command_header['clusterId'])

        if cluster_id != '-1' and cluster_id != 'null':
            service_name = command_header['serviceName']
            component_name = command_header['role']
        else:
            cluster_id = None
            service_name = None
            component_name = None

        required_config_timestamp = command_header[
            'requiredConfigTimestamp'] if 'requiredConfigTimestamp' in command_header else None

        command_dict = self.configuration_builder.get_configuration(
            cluster_id, service_name, component_name,
            required_config_timestamp)

        # remove data populated from topology to avoid merge and just override
        if 'clusterHostInfo' in command_header:
            del command_dict['clusterHostInfo']

        command = Utils.update_nested(Utils.get_mutable_copy(command_dict),
                                      command_header)

        # topology needs to be decompressed if and only if it originates from command header
        if 'clusterHostInfo' in command_header and command_header[
                'clusterHostInfo']:
            command['clusterHostInfo'] = self.decompress_cluster_host_info(
                command['clusterHostInfo'])

        return command
    def handle_heartbeat_reponse(self, response):
        serverId = int(response['id'])

        if serverId != self.responseId + 1:
            logger.error("Error in responseId sequence - restarting")
            Utils.restartAgent(self.stop_event)
        else:
            self.responseId = serverId
Exemple #3
0
  def handle_heartbeat_reponse(self, response):
    serverId = int(response['id'])

    if serverId != self.responseId + 1:
      logger.error("Error in responseId sequence - restarting")
      Utils.restartAgent(self.stop_event)
    else:
      self.responseId = serverId

    if 'restartAgent' in response and response['restartAgent'].lower() == "true":
      logger.warn("Restarting the agent by the request from server")
      Utils.restartAgent(self.stop_event)
  def generate_command(self, command_header):
    cluster_id = str(command_header['clusterId'])

    if cluster_id != '-1' and cluster_id != 'null':
      service_name = command_header['serviceName']
      component_name = command_header['role']
    else:
      cluster_id = None
      service_name = None
      component_name = None

    required_config_timestamp = command_header['requiredConfigTimestamp'] if 'requiredConfigTimestamp' in command_header else None

    command_dict = self.configuration_builder.get_configuration(cluster_id, service_name, component_name, required_config_timestamp)
    command = Utils.update_nested(Utils.get_mutable_copy(command_dict), command_header)
    return command
    def discard_stale_reports(self, cluster_reports):
        """
    Remove reports which are already stale (meaning other process has already updated status to something different)
    """
        with self.reports_to_discard_lock:
            # nothing to discard
            if not self.reports_to_discard:
                return cluster_reports

            reports_to_discard = self.reports_to_discard[:]

        new_cluster_reports = defaultdict(lambda: [])
        for cluster_id, cluster_reports in cluster_reports.iteritems():
            for cluster_report in cluster_reports:
                for discarded_report in reports_to_discard:
                    if Utils.are_dicts_equal(cluster_report,
                                             discarded_report,
                                             keys_to_skip=['status']):
                        self.logger.info(
                            "Discarding outdated status {0} before sending".
                            format(cluster_report))
                        break
                else:
                    new_cluster_reports[cluster_id].append(cluster_report)

        return new_cluster_reports
Exemple #6
0
  def __load_definitions(self):
    """
    Loads all alert definitions from a file. All clusters are stored in
    a single file. This wil also populate the cluster-to-hash dictionary.
    :return:
    """
    definitions = []
    for cluster_id, command_json in self.alert_definitions_cache.iteritems():
      clusterName = '' if not 'clusterName' in command_json else command_json['clusterName']
      hostName = '' if not 'hostName' in command_json else command_json['hostName']
      publicHostName = '' if not 'publicHostName' in command_json else command_json['publicHostName']
      clusterHash = None if not 'hash' in command_json else command_json['hash']

      # cache the cluster and cluster hash after loading the JSON
      if clusterName != '' and clusterHash is not None:
        logger.info('[AlertScheduler] Caching cluster {0} with alert hash {1}'.format(clusterName, clusterHash))

      for definition in command_json['alertDefinitions']:
        alert = self.__json_to_callable(clusterName, hostName, publicHostName, Utils.get_mutable_copy(definition))

        if alert is None:
          continue

        alert.set_helpers(self._collector, self._cluster_configuration, self.configuration_builder)

        definitions.append(alert)

    return definitions
Exemple #7
0
    def run(self):
        while not self.stop_event.is_set():
            try:
                if self.initializer_module.is_registered:
                    report = self.get_report()

                    if self.initializer_module.is_registered and not Utils.are_dicts_equal(
                            report,
                            self.last_report,
                            keys_to_skip=["agentTimeStampAtReporting"]):
                        self.initializer_module.connection.send(
                            message=report,
                            destination=Constants.HOST_STATUS_REPORTS_ENDPOINT)
                        self.last_report = report

                # don't use else to avoid race condition
                if not self.initializer_module.is_registered:
                    self.last_report = {}
            except ConnectionIsAlreadyClosed:  # server and agent disconnected during sending data. Not an issue
                pass
            except:
                logger.exception(
                    "Exception in HostStatusReporter. Re-running it")

            self.stop_event.wait(self.report_interval)

        logger.info("HostStatusReporter has successfully finished")
Exemple #8
0
    def run(self):
        while not self.stop_event.is_set():
            try:
                if self.initializer_module.is_registered:
                    report = self.get_report()

                    if self.initializer_module.is_registered and not Utils.are_dicts_equal(
                            report,
                            self.last_report,
                            keys_to_skip=["agentTimeStampAtReporting"]):
                        correlation_id = self.initializer_module.connection.send(
                            message=report,
                            destination=Constants.HOST_STATUS_REPORTS_ENDPOINT)
                        self.server_responses_listener.listener_functions_on_success[
                            correlation_id] = lambda headers, message: self.save_last_report(
                                report)

            except ConnectionIsAlreadyClosed:  # server and agent disconnected during sending data. Not an issue
                pass
            except:
                logger.exception(
                    "Exception in HostStatusReporter. Re-running it")

            self.stop_event.wait(self.report_interval)

        logger.info("HostStatusReporter has successfully finished")
Exemple #9
0
 def is_json_equal():
     #json_topology = json.dumps(self.initializer_module.topology_cache, indent=2, sort_keys=True)
     #json_excepted_lopology = json.dumps(self.get_dict_from_file("topology_cache_expected.json"), indent=2, sort_keys=True)
     #print json_topology
     #print json_excepted_lopology
     self.assertEquals(
         Utils.get_mutable_copy(self.initializer_module.topology_cache),
         self.get_dict_from_file("topology_cache_expected.json"))
Exemple #10
0
 def is_json_equal():
     #json_alert_definitions = json.dumps(self.initializer_module.alert_definitions_cache, indent=2, sort_keys=True)
     #json_excepted_definitions = json.dumps(self.get_dict_from_file("alert_definition_expected.json"), indent=2, sort_keys=True)
     #print json_definitions
     #print json_excepted_definitions
     self.assertEquals(
         Utils.get_mutable_copy(
             self.initializer_module.alert_definitions_cache),
         self.get_dict_from_file("alert_definition_expected.json"))
  def rewrite_cluster_cache(self, cluster_id, cache):
    """
    Thread-safe method for writing out the specified cluster cache
    and rewriting the in-memory representation.
    :param cluster_id:
    :param cache:
    :return:
    """
    logger.info("Rewriting cache {0} for cluster {1}".format(self.__class__.__name__, cluster_id))

    # The cache should contain exactly the data received from server.
    # Modifications on agent-side will lead to unnecessary cache sync every agent registration. Which is a big concern on perf clusters!
    # Also immutability can lead to multithreading issues.
    immutable_cache = Utils.make_immutable(cache)
    with self._cache_lock:
      self[cluster_id] = immutable_cache
Exemple #12
0
    def update_definitions(self, event_type):
        """
    Updates the persisted alert definitions JSON.
    :return:
    """
        # prune out things we don't want to store
        alert_definitions = []
        for cluster_id, command in self.alert_definitions_cache.iteritems():
            command_copy = Utils.get_mutable_copy(command)
            alert_definitions.append(command_copy)

        if event_type == "CREATE":
            # reschedule all jobs, creating new instances
            self.reschedule_all()
        else:
            # reschedule only the jobs that have changed
            self.reschedule()
Exemple #13
0
    def build(self, response_id='-1'):
        timestamp = int(time.time() * 1000)

        hostInfo = HostInfo(self.config)
        agentEnv = {}
        hostInfo.register(agentEnv, runExpensiveChecks=True)

        current_ping_port = self.config.get('agent', 'ping_port')

        register = {
            'id': int(response_id),
            'timestamp': timestamp,
            'hostname': hostname.hostname(self.config),
            'currentPingPort': int(current_ping_port),
            'publicHostname': hostname.public_hostname(self.config),
            'hardwareProfile': self.hardware.get(),
            'agentEnv': agentEnv,
            'agentVersion': Utils.read_agent_version(self.config),
            'prefix': self.config.get('agent', 'prefix')
        }
        return register
Exemple #14
0
    def report_status_to_sender(self, headers, message, ex=None):
        """
    Reports the status of delivery of the message to a sender

    @param headers: headers dictionary
    @param message: message payload dictionary
    @params ex: optional exception object for errors
    """
        if not Constants.MESSAGE_ID in headers:
            return

        if ex:
            confirmation_of_received = {
                Constants.MESSAGE_ID: headers[Constants.MESSAGE_ID],
                'status': 'ERROR',
                'reason': Utils.get_traceback_as_text(ex)
            }
        else:
            confirmation_of_received = {
                Constants.MESSAGE_ID: headers[Constants.MESSAGE_ID],
                'status': 'OK'
            }

        try:
            connection = self.initializer_module.connection
        except ConnectionIsAlreadyClosed:
            # access early copy of connection before it is exposed globally
            connection = self.initializer_module.heartbeat_thread.connection

        try:
            connection.send(message=confirmation_of_received,
                            destination=Constants.AGENT_RESPONSES_TOPIC)
        except:
            logger.exception(
                "Could not send a confirmation '{0}' to server".format(
                    confirmation_of_received))
 def _get_mutable_copy(self):
   with self._cache_lock:
     return Utils.get_mutable_copy(self)
 def restart_agent(self):
   logger.warn("Restarting the agent by the request from server")
   Utils.restartAgent(self.stop_event)
 def cache_update(self, update_dict, cache_hash):
   """
   Update the current dictionary by other one
   """
   merged_dict = Utils.update_nested(self._get_mutable_copy(), update_dict)
   self.rewrite_cache(merged_dict, cache_hash)