def send_email(current_ip, result):
    reporter = Reporter()
    email_detail = 'ip: ' + current_ip + '\n\n' + result
    reporter.send_email(email_title, email_detail)
Beispiel #2
0
class IndicesParser(object):
    def __init__(self):
        self.reporter = Reporter()

    def parse_data(self, indices_data, latest_indices_data, es_config_info):
        esindex_prefix = es_config_info['esindex_prefix']
        data_type = es_config_info['data_type']
        store_size_unit = es_config_info['store_size_unit']
        values = []
        timestamp = time.strftime('%Y-%m-%dT%H:%M:%S+08:00')
        store_size = 'store_size_in_' + store_size_unit
        pri_store_size = 'pri_store_size_in_' + store_size_unit
        final_data_template = {
            '@timestamp': timestamp,
            'appname': 'monitor',
            'type': data_type,
            'indices_status': {
                'health': {},
                'status': {},
                'index_name': {},
                'uuid': {},
                'pri': {},
                'rep': {},
                'docs_count': {},
                'docs_deleted': {},
                store_size: {},
                pri_store_size: {},
                'added_docs_count': {}  # optional attr
            }
        }

        temp_container = {}  # uuid:docs_count
        for index_data in indices_data:
            index_attr = index_data.strip().split()
            if not index_attr or len(index_attr) != 10:
                continue
            temp_container[index_attr[3]] = index_attr[6]

        for i, latest_index_data in enumerate(latest_indices_data):
            latest_index_attr = latest_index_data.strip().split()
            if not latest_index_attr or len(latest_index_attr) != 10:
                continue
            final_data = copy.deepcopy(final_data_template)
            final_data['indices_status']['health'] = latest_index_attr[0]
            final_data['indices_status']['status'] = latest_index_attr[1]
            final_data['indices_status']['index_name'] = latest_index_attr[2]
            final_data['indices_status']['uuid'] = latest_index_attr[3]
            final_data['indices_status']['pri'] = latest_index_attr[4]
            final_data['indices_status']['rep'] = latest_index_attr[5]
            final_data['indices_status']['docs_count'] = int(
                latest_index_attr[6])
            final_data['indices_status']['docs_deleted'] = int(
                latest_index_attr[7])

            store_size_value = self.store_size_unit_transition(
                store_size_unit, latest_index_attr[8], latest_index_attr[2])
            pri_store_size_value = self.store_size_unit_transition(
                store_size_unit, latest_index_attr[9], latest_index_attr[2])
            if store_size_value == -1 or pri_store_size_value == -1:
                continue
            final_data['indices_status'][store_size] = store_size_value
            final_data['indices_status'][pri_store_size] = pri_store_size_value

            if len(latest_indices_data) != len(
                    indices_data):  # extra defined field
                final_data['indices_status']['added_docs_count'] = 0
            elif final_data['indices_status']['uuid'] not in temp_container:
                final_data['indices_status']['added_docs_count'] = 0
            else:
                docs_count = temp_container[final_data['indices_status']
                                            ['uuid']]
                added_docs_count = int(latest_index_attr[6]) - int(docs_count)
                final_data['indices_status'][
                    'added_docs_count'] = added_docs_count

            esindex = "%s-%s" % (esindex_prefix, time.strftime('%Y.%m.%d'))

            values.append({
                "_index": esindex,
                "_type": data_type,
                "_source": final_data
            })
        return values

    # unit transition(只考虑bytes到gb,单索引存储太多的数据是一种ugly design)
    # todo...从时间复杂度上考虑,有没有更巧妙的算法来实现这个逻辑?
    def store_size_unit_transition(self, store_size_unit, store_size_value,
                                   index_name):
        if 'tb' in store_size_value:
            email_title = u'上海ES集群告警'
            email_detail = u'以下索引占用磁盘空间已经达到tb级别,请调整相关设置,该索引现在开始将不进行数据统计:' + index_name
            self.reporter.send_email(email_title, email_detail)
            return -1
        if store_size_unit == 'bytes':
            if store_size_value.endswith('gb'):
                store_size_value = float(
                    store_size_value.strip('gb')) * 1024 * 1024 * 1024
            elif store_size_value.endswith('mb'):
                store_size_value = float(
                    store_size_value.strip('mb')) * 1024 * 1024
            elif store_size_value.endswith('kb'):
                store_size_value = float(store_size_value.strip('kb')) * 1024
            else:
                store_size_value = float(store_size_value.strip('b'))
            return store_size_value
        elif store_size_unit == 'kb':
            if store_size_value.endswith('gb'):
                store_size_value = float(
                    store_size_value.strip('gb')) * 1024 * 1024
            elif store_size_value.endswith('mb'):
                store_size_value = float(store_size_value.strip('mb')) * 1024
            elif store_size_value.endswith('kb'):
                store_size_value = float(store_size_value.strip('kb'))
            else:
                store_size_value = float(store_size_value.strip('b')) / 1024
            return store_size_value
        elif store_size_unit == 'mb':
            if store_size_value.endswith('gb'):
                store_size_value = float(store_size_value.strip('gb')) * 1024
            elif store_size_value.endswith('mb'):
                store_size_value = float(store_size_value.strip('mb'))
            elif store_size_value.endswith('kb'):
                store_size_value = float(store_size_value.strip('kb')) / 1024
            else:
                store_size_value = float(
                    store_size_value.strip('b')) / 1024 / 1024
            return store_size_value
        else:
            if store_size_value.endswith('gb'):
                store_size_value = float(store_size_value.strip('gb'))
            elif store_size_value.endswith('mb'):
                store_size_value = float(store_size_value.strip('mb')) / 1024
            elif store_size_value.endswith('kb'):
                store_size_value = float(
                    store_size_value.strip('kb')) / 1024 / 1024
            else:
                store_size_value = float(
                    store_size_value.strip('b')) / 1024 / 1024 / 1024
            return store_size_value
Beispiel #3
0
class IndicesChecker(object):
    def __init__(self):
        conf = ConfigParser.ConfigParser()
        conf.read(CONFIG_PATH)
        self.es_url = conf.get("ES", "es_url")
        self.esindex_prefix = conf.get("ES", "esindex_prefix")
        self.data_type = conf.get("indices_module", "data_type")
        self.sampling_speed = int(conf.get("indices_module", "sampling_speed"))
        self.store_size_unit = conf.get("indices_module", "store_size_unit")

        self.indices_parser = IndicesParser()
        self.reporter = Reporter()

    def start_indices_task(self):
        if_send_email = True
        while True:
            logger.info('Start indices_checker')
            try:
                es = Elasticsearch(self.es_url)
                indices_data = self.get_indices_status(self.es_url)
                time.sleep(self.sampling_speed)
                values = self.make_indices_data(indices_data)
                self.send_data(es, values)
                if_send_email = True
                for conn in es.transport.connection_pool.connections:
                    conn.pool.close()
                logger.info('Indices_checker is running normally')
            except pycurl.error:
                if if_send_email:
                    email_title = u'上海ES集群告警'
                    email_detail = u'以下节点或整个集群出现异常,请进行检查:' + self.es_url
                    self.reporter.send_email(email_title, email_detail)
                    if_send_email = False
                    logger.warn(
                        'Indices_checker fails to connect to es, we will send an alert e-mail'
                    )
                else:
                    logger.warn(
                        'Indices_checker fails to connect to es, we have already sent an alert e-mail'
                    )
            except Exception as e:
                logger.error(
                    'Some exception happened to indices_checker, details are as follows:'
                )
                logger.error(e)
            finally:
                logger.info('Finish indices_checker\n')

    def get_indices_status(self, es_url):
        b = StringIO.StringIO()
        c = pycurl.Curl()
        c.setopt(pycurl.URL, 'http://' + es_url + ':9200/_cat/indices')
        c.setopt(pycurl.WRITEFUNCTION, b.write)
        c.perform()
        body = b.getvalue()
        c.close()
        return body.split('\n')

    def make_indices_data(self, indices_data):
        latest_indices_data = self.get_indices_status(self.es_url)
        es_config_info = {
            'esindex_prefix': self.esindex_prefix,
            'data_type': self.data_type,
            'store_size_unit': self.store_size_unit
        }
        return self.indices_parser.parse_data(indices_data,
                                              latest_indices_data,
                                              es_config_info)

    def send_data(self, es, values):
        helpers.bulk(es, values)
Beispiel #4
0
class NodesChecker(object):

    def __init__(self):
        conf = ConfigParser.ConfigParser()
        conf.read(CONFIG_PATH)
        self.es_url = conf.get("ES", "es_url")
        self.esindex_prefix = conf.get("ES", "esindex_prefix")
        self.data_type = conf.get("nodes_module", "data_type")
        self.sampling_speed = int(conf.get("nodes_module", "sampling_speed"))
        self.data_structure = conf.get("nodes_module", "data_structure")
        self.nodes_total_count = conf.get("nodes_module", "nodes_total_count")

        self.nodes_parser = NodesParser()
        self.es_template = EsTemplate()
        self.reporter = Reporter()

    def start_nodes_task(self):
        self.es_template.make_nodes_template(self.es_url, self.data_structure)
        if_send_email = True
        latest_check_time = time.strftime('%Y-%m-%dT%H时')
        while True:
            logger.info('Start nodes_checker')
            try:
                es = Elasticsearch(self.es_url)
                nodes_count, nodes_data = self.get_nodes_status(self.es_url)
                if nodes_count < int(self.nodes_total_count) and time.strftime('%Y-%m-%dT%H时') != latest_check_time:
                    email_title = u'上海ES集群告警'
                    email_detail = u'该集群共有' + str(self.nodes_total_count) + u'个节点,目前只有' + str(nodes_count) + u'个处于正常状态,请检查!'
                    self.reporter.send_email(email_title, email_detail)
                    latest_check_time = time.strftime('%Y-%m-%dT%H时')
                time.sleep(self.sampling_speed)
                values = self.make_nodes_data(nodes_count, nodes_data)
                self.send_data(es, values)
                if_send_email = True
                for conn in es.transport.connection_pool.connections:
                    conn.pool.close()
                logger.info('Nodes_checker is running normally')
            except pycurl.error:
                if if_send_email:
                    email_title = u'上海ES集群告警'
                    email_detail = u'以下节点或整个集群出现异常,请进行检查:' + self.es_url
                    self.reporter.send_email(email_title, email_detail)
                    if_send_email = False
                    logger.warn('Nodes_checker fails to connect to es, we will send an alert e-mail')
                else:
                    logger.warn('Nodes_checker fails to connect to es, we have already sent an alert e-mail')
            except Exception as e:
                logger.error('Some exception happened to nodes_checker, details are as follows:')
                logger.error(e)
            finally:
                logger.info('Finish nodes_checker\n')

    def get_nodes_status(self, es_url):
        b = StringIO.StringIO()
        c = pycurl.Curl()
        c.setopt(pycurl.URL, 'http://'+es_url+':9200/_nodes/stats/indices,os,process,jvm,fs,transport/search')
        c.setopt(pycurl.WRITEFUNCTION, b.write)
        c.perform()
        body = b.getvalue()
        c.close()
        data = json.loads(body)
        nodes_data = data['nodes']
        nodes_count = len(nodes_data.keys())
        return nodes_count, nodes_data

    def make_nodes_data(self, nodes_count, nodes_data):
        latest_nodes_count, latest_nodes_data = self.get_nodes_status(self.es_url)
        if latest_nodes_count < nodes_count:
            node_ip_list = [nodes_data[node]['ip'].split(':')[0] for node in nodes_data.keys()]
            latest_node_ip_list = [latest_nodes_data[node]['ip'].split(':')[0] for node in latest_nodes_data.keys()]
            lost_node_ip = [node_ip for node_ip in node_ip_list if node_ip not in latest_node_ip_list]
            email_title = u'上海ES集群告警'
            email_detail = u'以下节点的ES从原集群中脱离了,请进行检查:' + str(lost_node_ip)
            self.reporter.send_email(email_title, email_detail)
            return []
        elif latest_nodes_count > nodes_count:
            node_ip_list = [nodes_data[node]['ip'].split(':')[0] for node in nodes_data.keys()]
            latest_node_ip_list = [latest_nodes_data[node]['ip'].split(':')[0] for node in latest_nodes_data.keys()]
            added_node_ip = [latest_node_ip for latest_node_ip in latest_node_ip_list if latest_node_ip not in node_ip_list]
            email_title = u'上海ES集群告警'
            email_detail = u'新加入了以下节点,请确认:' + str(added_node_ip)
            self.reporter.send_email(email_title, email_detail)
            return []
        else:
            es_config_info = {'esindex_prefix': self.esindex_prefix, 'data_type': self.data_type}
            if self.data_structure == 'flat':
                return self.nodes_parser.parse2flat_data(nodes_data, latest_nodes_data, es_config_info)
            else:
                return self.nodes_parser.parse2nested_data(nodes_data, latest_nodes_data, es_config_info)

    def send_data(self, es, values):
        helpers.bulk(es, values)