Beispiel #1
0
    def _update_state(self, transformed_data: Dict) -> None:
        self.logger.debug("Updating state ...")

        if 'result' in transformed_data:
            meta_data = transformed_data['result']['meta_data']
            metrics = transformed_data['result']['data']
            repo_id = meta_data['repo_id']
            parent_id = meta_data['repo_parent_id']
            repo_name = meta_data['repo_name']
            repo = self.state[repo_id]

            # Set repo details just in case the configs have changed
            repo.set_parent_id(parent_id)
            repo.set_repo_name(repo_name)

            # Save the new metrics
            repo.set_last_monitored(meta_data['last_monitored'])
            repo.set_no_of_releases(metrics['no_of_releases'])
        elif 'error' in transformed_data:
            meta_data = transformed_data['error']['meta_data']
            repo_name = meta_data['repo_name']
            repo_id = meta_data['repo_id']
            parent_id = meta_data['repo_parent_id']
            repo = self.state[repo_id]

            # Set repo details just in case the configs have changed
            repo.set_parent_id(parent_id)
            repo.set_repo_name(repo_name)
        else:
            raise ReceivedUnexpectedDataException(
                "{}: _update_state".format(self))

        self.logger.debug("State updated successfully")
Beispiel #2
0
    def _process_transformed_data_for_saving(self,
                                             transformed_data: Dict) -> Dict:
        self.logger.debug("Performing further processing for storage ...")

        if 'result' in transformed_data:
            td_meta_data = transformed_data['result']['meta_data']
            td_metrics = transformed_data['result']['data']
            no_of_releases = td_metrics['no_of_releases']

            processed_data = {
                'result': {
                    'meta_data': copy.deepcopy(td_meta_data),
                    'data': {
                        'no_of_releases': no_of_releases
                    }
                }
            }
        elif 'error' in transformed_data:
            processed_data = copy.deepcopy(transformed_data)
        else:
            raise ReceivedUnexpectedDataException(
                "{}: _process_transformed_data_for_saving".format(self))

        self.logger.debug("Processing successful")

        return processed_data
Beispiel #3
0
    def _update_state(self, transformed_data: Dict) -> None:
        self.logger.debug("Updating state ...")

        if 'result' in transformed_data:
            meta_data = transformed_data['result']['meta_data']
            metrics = transformed_data['result']['data']
            system_id = meta_data['system_id']
            parent_id = meta_data['system_parent_id']
            system_name = meta_data['system_name']
            system = self.state[system_id]

            # Set system details just in case the configs have changed
            system.set_parent_id(parent_id)
            system.set_system_name(system_name)

            # Save the new metrics in process memory
            system.set_last_monitored(meta_data['last_monitored'])
            system.set_process_cpu_seconds_total(
                metrics['process_cpu_seconds_total'])
            system.set_process_memory_usage(metrics['process_memory_usage'])
            system.set_virtual_memory_usage(metrics['virtual_memory_usage'])
            system.set_open_file_descriptors(metrics['open_file_descriptors'])
            system.set_system_cpu_usage(metrics['system_cpu_usage'])
            system.set_system_ram_usage(metrics['system_ram_usage'])
            system.set_system_storage_usage(metrics['system_storage_usage'])
            system.set_network_receive_bytes_total(
                metrics['network_receive_bytes_total'])
            system.set_network_transmit_bytes_total(
                metrics['network_transmit_bytes_total'])
            system.set_disk_io_time_seconds_total(
                metrics['disk_io_time_seconds_total'])
            system.set_network_transmit_bytes_per_second(
                metrics['network_transmit_bytes_per_second'])
            system.set_network_receive_bytes_per_second(
                metrics['network_receive_bytes_per_second'])
            system.set_disk_io_time_seconds_in_interval(
                metrics['disk_io_time_seconds_in_interval'])
            system.set_as_up()
        elif 'error' in transformed_data:
            meta_data = transformed_data['error']['meta_data']
            error_code = transformed_data['error']['code']
            system_name = meta_data['system_name']
            system_id = meta_data['system_id']
            parent_id = meta_data['system_parent_id']
            downtime_exception = SystemIsDownException(system_name)
            system = self.state[system_id]

            # Set system details just in case the configs have changed
            system.set_parent_id(parent_id)
            system.set_system_name(system_name)

            if error_code == downtime_exception.code:
                went_down_at = transformed_data['error']['data']['went_down_at']
                system.set_as_down(went_down_at)
        else:
            raise ReceivedUnexpectedDataException(
                "{}: _update_state".format(self))

        self.logger.debug("State updated successfully")
Beispiel #4
0
 def _process_mongo_store(self, data: Dict) -> None:
     if 'result' in data:
         self._process_mongo_result_store(data['result'])
     elif 'error' in data:
         self._process_mongo_error_store(data['error'])
     else:
         raise ReceivedUnexpectedDataException(
             "{}: _process_mongo_store".format(self))
Beispiel #5
0
 def _process_redis_store(self, data: Dict) -> None:
     if 'result' in data:
         self._process_redis_result_store(data['result'])
     elif 'error' in data:
         # No need to store anything if the index key is `error`
         return
     else:
         raise ReceivedUnexpectedDataException(
             "{}: _process_redis_store".format(self))
Beispiel #6
0
    def _process_transformed_data_for_saving(self,
                                             transformed_data: Dict) -> Dict:
        self.logger.debug("Performing further processing for storage ...")

        if 'result' in transformed_data or 'error' in transformed_data:
            processed_data = copy.deepcopy(transformed_data)
        else:
            raise ReceivedUnexpectedDataException(
                "{}: _process_transformed_data_for_saving".format(self))

        self.logger.debug("Processing successful")

        return processed_data
Beispiel #7
0
    def _transform_data(self, data: Dict) -> Tuple[Dict, Dict, Dict]:
        self.logger.debug("Performing data transformation on %s ...", data)

        if 'result' in data:
            meta_data = data['result']['meta_data']
            repo_metrics = data['result']['data']

            transformed_data = {
                'result': {
                    'meta_data': copy.deepcopy(meta_data),
                    'data': {},
                }
            }
            td_meta_data = transformed_data['result']['meta_data']
            td_metrics = transformed_data['result']['data']

            # Transform the meta_data by deleting the monitor_name and changing
            # the time key to last_monitored key
            del td_meta_data['monitor_name']
            del td_meta_data['time']
            td_meta_data['last_monitored'] = meta_data['time']

            # Transform the data by adding the no_of_releases and releases
            # metrics.
            td_metrics['no_of_releases'] = len(repo_metrics)
            td_metrics['releases'] = copy.deepcopy(repo_metrics)
        elif 'error' in data:
            # In case of errors in the sent messages only remove the
            # monitor_name from the meta data
            transformed_data = copy.deepcopy(data)
            del transformed_data['error']['meta_data']['monitor_name']
        else:
            raise ReceivedUnexpectedDataException(
                "{}: _transform_data".format(self))

        data_for_alerting = self._process_transformed_data_for_alerting(
            transformed_data)
        data_for_saving = self._process_transformed_data_for_saving(
            transformed_data)

        self.logger.debug("Data transformation successful")

        return transformed_data, data_for_alerting, data_for_saving
Beispiel #8
0
    def _process_transformed_data_for_alerting(self,
                                               transformed_data: Dict) -> Dict:
        self.logger.debug("Performing further processing for alerting ...")

        if 'result' in transformed_data:
            td_meta_data = transformed_data['result']['meta_data']
            td_repo_id = td_meta_data['repo_id']
            repo = self.state[td_repo_id]
            td_metrics = transformed_data['result']['data']

            processed_data = {
                'result': {
                    'meta_data': copy.deepcopy(td_meta_data),
                    'data': {}
                }
            }

            # Reformat the data in such a way that both the previous and current
            # states are sent to the alerter. Exclude the releases list, as the
            # previous list can be inferred.
            processed_data_metrics = processed_data['result']['data']
            for metric, value in td_metrics.items():
                if metric != 'releases':
                    processed_data_metrics[metric] = {}
                    processed_data_metrics[metric]['current'] = value

            # Add the previous state
            processed_data_metrics['no_of_releases']['previous'] = \
                repo.no_of_releases

            # Finally add the list of releases
            processed_data_metrics['releases'] = \
                copy.deepcopy(td_metrics['releases'])
        elif 'error' in transformed_data:
            processed_data = copy.deepcopy(transformed_data)
        else:
            raise ReceivedUnexpectedDataException(
                "{}: _process_transformed_data_for_alerting".format(self))

        self.logger.debug("Processing successful.")

        return processed_data
Beispiel #9
0
    def _process_heartbeat(self, ch: BlockingChannel,
                           method: pika.spec.Basic.Deliver,
                           properties: pika.spec.BasicProperties, body: bytes) \
            -> None:
        heartbeat = json.loads(body)
        self.logger.debug("Received %s. Now processing this data.", heartbeat)

        try:
            if method.routing_key == 'heartbeat.worker' or \
                    method.routing_key == 'heartbeat.manager':
                component_name = heartbeat['component_name']

                key_heartbeat = Keys.get_component_heartbeat(component_name)
                transformed_heartbeat = json.dumps(heartbeat)
                self._save_to_redis_and_add_to_state_if_fail(
                    key_heartbeat, transformed_heartbeat)

                self._dump_unsavable_redis_data()

                self.logger.debug("Successfully processed %s", heartbeat)
            else:
                raise ReceivedUnexpectedDataException(
                    "{}: _process_heartbeat".format(self))
        except Exception as e:
            self.logger.error("Error when processing %s", heartbeat)
            self.logger.exception(e)

        self.rabbitmq.basic_ack(method.delivery_tag, False)

        self.logger.debug("Saving %s heartbeat to Redis", self)
        key_heartbeat = Keys.get_component_heartbeat(self.name)
        handler_heartbeat = {
            'component_name': self.name,
            'timestamp': datetime.now().timestamp()
        }
        transformed_handler_heartbeat = json.dumps(handler_heartbeat)
        ret = self.redis.set(key_heartbeat, transformed_handler_heartbeat)
        if ret is None:
            self.logger.error("Could not save %s=%s to Redis.", key_heartbeat,
                              transformed_handler_heartbeat)
Beispiel #10
0
    def _process_raw_data(self, ch: BlockingChannel,
                          method: pika.spec.Basic.Deliver,
                          properties: pika.spec.BasicProperties, body: bytes) \
            -> None:
        raw_data = json.loads(body)
        self.logger.debug("Received %s from monitors. Now processing this "
                          "data.", raw_data)

        processing_error = False
        transformed_data = {}
        data_for_alerting = {}
        data_for_saving = {}
        try:
            if 'result' in raw_data or 'error' in raw_data:
                response_index_key = 'result' if 'result' in raw_data \
                    else 'error'
                meta_data = raw_data[response_index_key]['meta_data']
                system_id = meta_data['system_id']
                system_parent_id = meta_data['system_parent_id']
                system_name = meta_data['system_name']

                if system_id not in self.state:
                    new_system = System(system_name, system_id,
                                        system_parent_id)
                    loaded_system = self.load_state(new_system)
                    self._state[system_id] = loaded_system

                transformed_data, data_for_alerting, data_for_saving = \
                    self._transform_data(raw_data)
            else:
                raise ReceivedUnexpectedDataException(
                    "{}: _process_raw_data".format(self))
        except Exception as e:
            self.logger.error("Error when processing %s", raw_data)
            self.logger.exception(e)
            processing_error = True

        # If the data is processed, it can be acknowledged.
        self.rabbitmq.basic_ack(method.delivery_tag, False)

        # We want to update the state after the data is acknowledged, otherwise
        # if acknowledgement fails the state would be erroneous when processing
        # the data again. Note, only update the state if there were no
        # processing errors.
        if not processing_error:
            try:
                self._update_state(transformed_data)
                self.logger.debug("Successfully processed %s", raw_data)
            except Exception as e:
                self.logger.error("Error when processing %s", raw_data)
                self.logger.exception(e)
                processing_error = True

        # Place the data on the publishing queue if there were no processing
        # errors. This is done after acknowledging the data, so that if
        # acknowledgement fails, the data is processed again and we do not have
        # duplication of data in the queue
        if not processing_error:
            self._place_latest_data_on_queue(
                transformed_data, data_for_alerting, data_for_saving)

        # Send any data waiting in the publisher queue, if any
        try:
            self._send_data()

            if not processing_error:
                heartbeat = {
                    'component_name': self.transformer_name,
                    'is_alive': True,
                    'timestamp': datetime.now().timestamp()
                }
                self._send_heartbeat(heartbeat)
        except MessageWasNotDeliveredException as e:
            # Log the message and do not raise it as message is residing in the
            # publisher queue.
            self.logger.exception(e)
        except Exception as e:
            # For any other exception raise it.
            raise e
Beispiel #11
0
    def _transform_data(self, data: Dict) -> Tuple[Dict, Dict, Dict]:
        self.logger.debug("Performing data transformation on %s ...", data)

        if 'result' in data:
            meta_data = data['result']['meta_data']
            system_metrics = data['result']['data']
            system_id = meta_data['system_id']
            system = self.state[system_id]

            # Compute the network receive/transmit bytes per second based on the
            # totals and the saved last monitoring round
            transmit_bytes_total = system_metrics[
                'network_transmit_bytes_total']
            receive_bytes_total = system_metrics['network_receive_bytes_total']
            network_transmit_bytes_per_second = None
            network_receive_bytes_per_second = None

            # If we have values to compare to (i.e. not the first ever
            # transformation) compute the bytes per second transmitted/received
            if system.last_monitored is not None:
                network_transmit_bytes_per_second = \
                    (transmit_bytes_total -
                     system.network_transmit_bytes_total) \
                    / (meta_data['time'] - system.last_monitored)
                network_receive_bytes_per_second = \
                    (receive_bytes_total - system.network_receive_bytes_total) \
                    / (meta_data['time'] - system.last_monitored)

            # Compute the time spent doing io since the last time we received
            # data for this system
            disk_io_time_seconds_total = system_metrics[
                'disk_io_time_seconds_total']
            disk_io_time_seconds_in_interval = None

            # If we have values to compare to (i.e. not the first ever
            # transformation) compute the time spent doing io since the last
            # monitoring round
            if system.last_monitored is not None:
                disk_io_time_seconds_in_interval = \
                    disk_io_time_seconds_total - \
                    system.disk_io_time_seconds_total

            transformed_data = copy.deepcopy(data)
            td_meta_data = transformed_data['result']['meta_data']
            td_metrics = transformed_data['result']['data']

            # Transform the meta_data by deleting the monitor_name and changing
            # the time key to last_monitored key
            del td_meta_data['monitor_name']
            del td_meta_data['time']
            td_meta_data['last_monitored'] = meta_data['time']

            # Transform the data by adding the new processed data.
            td_metrics['network_transmit_bytes_per_second'] = \
                network_transmit_bytes_per_second
            td_metrics['network_receive_bytes_per_second'] = \
                network_receive_bytes_per_second
            td_metrics['disk_io_time_seconds_in_interval'] = \
                disk_io_time_seconds_in_interval
            td_metrics['went_down_at'] = None
        elif 'error' in data:
            meta_data = data['error']['meta_data']
            error_code = data['error']['code']
            system_id = meta_data['system_id']
            system_name = meta_data['system_name']
            time_of_error = meta_data['time']
            system = self.state[system_id]
            downtime_exception = SystemIsDownException(system_name)

            # In case of errors in the sent messages only remove the
            # monitor_name from the meta data
            transformed_data = copy.deepcopy(data)
            del transformed_data['error']['meta_data']['monitor_name']

            # If we have a downtime error, set went_down_at to the time of error
            # if the system was up. Otherwise, leave went_down_at as stored in
            # the system state
            if error_code == downtime_exception.code:
                transformed_data['error']['data'] = {}
                td_metrics = transformed_data['error']['data']
                td_metrics['went_down_at'] = \
                    system.went_down_at if system.is_down else time_of_error
        else:
            raise ReceivedUnexpectedDataException(
                "{}: _transform_data".format(self))

        data_for_alerting = self._process_transformed_data_for_alerting(
            transformed_data)
        data_for_saving = self._process_transformed_data_for_saving(
            transformed_data)

        self.logger.debug("Data transformation successful")

        return transformed_data, data_for_alerting, data_for_saving
Beispiel #12
0
    def _process_transformed_data_for_alerting(self,
                                               transformed_data: Dict) -> Dict:
        self.logger.debug("Performing further processing for alerting ...")

        if 'result' in transformed_data:
            td_meta_data = transformed_data['result']['meta_data']
            td_system_id = td_meta_data['system_id']
            system = self.state[td_system_id]
            td_metrics = transformed_data['result']['data']

            processed_data = {
                'result': {
                    'meta_data': copy.deepcopy(td_meta_data),
                    'data': {}
                }
            }

            # Reformat the data in such a way that both the previous and current
            # states are sent to the alerter
            processed_data_metrics = processed_data['result']['data']
            for metric, value in td_metrics.items():
                processed_data_metrics[metric] = {}
                processed_data_metrics[metric]['current'] = value

            processed_data_metrics['process_cpu_seconds_total']['previous'] = \
                system.process_cpu_seconds_total
            processed_data_metrics['process_memory_usage']['previous'] = \
                system.process_memory_usage
            processed_data_metrics['virtual_memory_usage']['previous'] = \
                system.virtual_memory_usage
            processed_data_metrics['open_file_descriptors']['previous'] = \
                system.open_file_descriptors
            processed_data_metrics['system_cpu_usage']['previous'] = \
                system.system_cpu_usage
            processed_data_metrics['system_ram_usage']['previous'] = \
                system.system_ram_usage
            processed_data_metrics['system_storage_usage']['previous'] = \
                system.system_storage_usage
            processed_data_metrics['network_receive_bytes_total']['previous'] \
                = system.network_receive_bytes_total
            processed_data_metrics['network_transmit_bytes_total']['previous'] \
                = system.network_transmit_bytes_total
            processed_data_metrics['disk_io_time_seconds_total']['previous'] \
                = system.disk_io_time_seconds_total
            processed_data_metrics['network_transmit_bytes_per_second'][
                'previous'] = system.network_transmit_bytes_per_second
            processed_data_metrics['network_receive_bytes_per_second'][
                'previous'] = system.network_receive_bytes_per_second
            processed_data_metrics['disk_io_time_seconds_in_interval'][
                'previous'] = system.disk_io_time_seconds_in_interval
            processed_data_metrics['went_down_at'][
                'previous'] = system.went_down_at
        elif 'error' in transformed_data:
            td_meta_data = transformed_data['error']['meta_data']
            td_error_code = transformed_data['error']['code']
            td_system_id = td_meta_data['system_id']
            td_system_name = td_meta_data['system_name']
            system = self.state[td_system_id]
            downtime_exception = SystemIsDownException(td_system_name)

            processed_data = copy.deepcopy(transformed_data)

            if td_error_code == downtime_exception.code:
                td_metrics = transformed_data['error']['data']
                processed_data_metrics = processed_data['error']['data']

                for metric, value in td_metrics.items():
                    processed_data_metrics[metric] = {}
                    processed_data_metrics[metric]['current'] = value

                processed_data_metrics['went_down_at']['previous'] = \
                    system.went_down_at
        else:
            raise ReceivedUnexpectedDataException(
                "{}: _process_transformed_data_for_alerting".format(self))

        self.logger.debug("Processing successful.")

        return processed_data
Beispiel #13
0
    def _process_data(self,
                      ch: pika.adapters.blocking_connection.BlockingChannel,
                      method: pika.spec.Basic.Deliver,
                      properties: pika.spec.BasicProperties,
                      body: bytes) -> None:
        data_received = json.loads(body.decode())
        self.logger.debug("Received %s. Now processing this data.",
                          data_received)

        parsed_routing_key = method.routing_key.split('.')
        processing_error = False
        data_for_alerting = []
        try:
            if self.alerts_configs.parent_id in parsed_routing_key:
                if 'result' in data_received:
                    data = data_received['result']['data']
                    meta_data = data_received['result']['meta_data']
                    system_id = meta_data['system_id']
                    self._create_state_for_system(system_id)

                    self._process_results(data, meta_data, data_for_alerting)
                elif 'error' in data_received:
                    self._create_state_for_system(
                        data_received['error']['meta_data']['system_id'])
                    self._process_errors(data_received['error'],
                                         data_for_alerting)
                else:
                    raise ReceivedUnexpectedDataException(
                        "{}: _process_data".format(self))
            else:
                raise ReceivedUnexpectedDataException(
                    "{}: _process_data".format(self))
        except Exception as e:
            self.logger.error("Error when processing %s", data_received)
            self.logger.exception(e)
            processing_error = True

        # If the data is processed, it can be acknowledged.
        self.rabbitmq.basic_ack(method.delivery_tag, False)

        # Place the data on the publishing queue if there were no processing
        # errors. This is done after acknowledging the data, so that if
        # acknowledgement fails, the data is processed again and we do not have
        # duplication of data in the queue.
        if not processing_error:
            self._place_latest_data_on_queue(data_for_alerting)

        # Send any data waiting in the publisher queue, if any
        try:
            self._send_data()

            if not processing_error:
                heartbeat = {
                    'component_name': self.alerter_name,
                    'is_alive': True,
                    'timestamp': datetime.now().timestamp()
                }
                self._send_heartbeat(heartbeat)
        except MessageWasNotDeliveredException as e:
            # Log the message and do not raise the exception so that the
            # message can be acknowledged and removed from the rabbit queue.
            # Note this message will still reside in the publisher queue.
            self.logger.exception(e)
        except Exception as e:
            # For any other exception acknowledge and raise it, so the
            # message is removed from the rabbit queue as this message will now
            # reside in the publisher queue
            raise e
Beispiel #14
0
    def _process_data(self,
                      ch: pika.adapters.blocking_connection.BlockingChannel,
                      method: pika.spec.Basic.Deliver,
                      properties: pika.spec.BasicProperties,
                      body: bytes) -> None:
        data_received = json.loads(body.decode())
        self.logger.debug("Received %s. Now processing this data.",
                          data_received)

        processing_error = False
        data_for_alerting = []
        try:
            if 'result' in data_received:
                meta = data_received['result']['meta_data']
                data = data_received['result']['data']

                if self._cannot_access_github_page:
                    alert = GitHubPageNowAccessibleAlert(
                        meta['repo_name'], 'INFO', meta['last_monitored'],
                        meta['repo_parent_id'], meta['repo_id'])
                    data_for_alerting.append(alert.alert_data)
                    self.logger.debug("Successfully classified alert %s",
                                      alert.alert_data)
                    self._cannot_access_github_page = False

                current = data['no_of_releases']['current']
                previous = data['no_of_releases']['previous']
                if previous is not None and int(current) > int(previous):
                    for i in range(0, current - previous):
                        alert = NewGitHubReleaseAlert(
                            meta['repo_name'],
                            data['releases'][str(i)]['release_name'],
                            data['releases'][str(i)]['tag_name'], 'INFO',
                            meta['last_monitored'], meta['repo_parent_id'],
                            meta['repo_id'])
                        data_for_alerting.append(alert.alert_data)
                        self.logger.debug("Successfully classified alert %s",
                                          alert.alert_data)
            elif 'error' in data_received:
                if int(data_received['error']['code']) == 5006:
                    meta_data = data_received['error']['meta_data']
                    alert = CannotAccessGitHubPageAlert(
                        meta_data['repo_name'], 'ERROR', meta_data['time'],
                        meta_data['repo_parent_id'], meta_data['repo_id'])
                    data_for_alerting.append(alert.alert_data)
                    self.logger.debug("Successfully classified alert %s",
                                      alert.alert_data)
                    self._cannot_access_github_page = True
            else:
                raise ReceivedUnexpectedDataException("{}: _process_data"
                                                      "".format(self))
        except Exception as e:
            self.logger.error("Error when processing %s", data_received)
            self.logger.exception(e)
            processing_error = True

        self.rabbitmq.basic_ack(method.delivery_tag, False)

        # Place the data on the publishing queue if there were no processing
        # errors. This is done after acknowledging the data, so that if
        # acknowledgement fails, the data is processed again and we do not have
        # duplication of data in the queue
        if not processing_error:
            self._place_latest_data_on_queue(data_for_alerting)

        # Send any data waiting in the publisher queue, if any
        try:
            self._send_data()

            if not processing_error:
                heartbeat = {
                    'component_name': self.alerter_name,
                    'is_alive': True,
                    'timestamp': datetime.now().timestamp()
                }
                self._send_heartbeat(heartbeat)
        except MessageWasNotDeliveredException as e:
            # Log the message and do not raise it as message is residing in the
            # publisher queue.
            self.logger.exception(e)
        except Exception as e:
            # For any other exception raise it
            raise e