コード例 #1
0
class MigrationLogstore(object):
    def __init__(
        self,
        endpoint,
        access_id,
        access_key,
        project_name,
        logstore_name,
        topic,
        source,
    ):
        self._log_client = LogClient(
            endpoint=endpoint,
            accessKeyId=access_id,
            accessKey=access_key,
        )
        self._project_name = project_name
        self._logstore_name = logstore_name
        self._topic, self._source = topic, source

    @property
    def name(self):
        return self._logstore_name

    def put_logs(self, logitems):
        self._log_client.put_logs(
            PutLogsRequest(
                project=self._project_name,
                logstore=self._logstore_name,
                topic=self._topic,
                source=self._source,
                logitems=logitems,
            ))
コード例 #2
0
def send_log_to_aliyun(logstore, message):
    """
    向阿里云日志系统 发送log函数
    :param message:  将要发送的log字符串
    :return:  None
    """
    # 构建一个 client 使用 client 实例的方法来操作日志服务
    client = LogClient(settings.END_POINT, settings.ACCESS_KEY_ID,
                       settings.ACCESS_KEY)
    log_item = LogItem()
    log_item.set_time(int(time.time()))
    log_item.set_contents([('message', message)])
    put_logs_request = PutLogsRequest(settings.PROJECT, logstore, '', '',
                                      [log_item])
    client.put_logs(put_logs_request)  # 发送log
コード例 #3
0
ファイル: aliyunlog.py プロジェクト: cone387/hotspot
class AliyunLog():
    def __init__(self,
                 endpoint,
                 access_key_id,
                 access_key,
                 project=None,
                 logstore=None,
                 topic=None,
                 source=None):
        self.logstore = logstore
        self.project = project
        self.topic = topic
        self.source = source
        assert isinstance(self.topic, str), 'topic must be string'
        # assert len(self.topic.split(':')) == 2, 'topic must format like xxx:xxx'
        self.client = LogClient(endpoint, access_key_id, access_key)

    # def add_log(self, item:dict):  # contents为[(name1, value1), (name2, value2), ...]
    #     log_item = LogItem(int(time.time()), list(item.items()))
    #     log_req = PutLogsRequest(self.project, self.logstore, topic=self.topic, source=self.source, logitems=[log_item])
    #     self.client.put_logs(log_req)

    def add_log(self, item: str):  # item为json.dumps(obj)
        log_item = LogItem(int(time.time()), [('content', item)])
        log_req = PutLogsRequest(self.project,
                                 self.logstore,
                                 topic=self.topic,
                                 source=self.source,
                                 logitems=[log_item])
        self.client.put_logs(log_req)

    def get_topics(self, fromTime=None, toTime=None):
        try:
            req = GetLogsRequest(
                self.project,
                self.logstore,
                fromTime=fromTime,
                toTime=toTime,
                topic=self.topic,
                query='*|select "__topic__" group by "__topic__"')
            res = self.client.get_logs(req)
            return [log.get_contents()['__topic__'] for log in res.get_logs()]
        except Exception as e:
            print("Get topic error: %s" % str(e))
            return []

    def get_logs(self, fromTime, toTime):
        try:
            req = GetLogsRequest(self.project,
                                 self.logstore,
                                 fromTime=fromTime,
                                 toTime=toTime,
                                 query='*')
            res = self.client.get_logs(req)
            return [log.get_contents() for log in res.get_logs()]
        except Exception as e:
            print("Get logs error: %s" % str(e))
            return []
        try:
            listShardRes = self.client.list_shards(self.project, self.logstore)
            log_list = []
            for shard in listShardRes.get_shards_info():
                shard_id = shard["shardID"]
                res = self.client.get_cursor(self.project, self.logstore,
                                             shard_id, fromTime)
                start_cursor = res.get_cursor()
                res = self.client.get_cursor(self.project, self.logstore,
                                             shard_id, toTime)
                end_cursor = res.get_cursor()
                while True:
                    loggroup_count = 100  # 每次读取100个包
                    res = self.client.pull_logs(self.project, self.logstore,
                                                shard_id, start_cursor,
                                                loggroup_count, end_cursor)
                    log_list += res.get_loggroup_json_list()
                    next_cursor = res.get_next_cursor()
                    if next_cursor == start_cursor:
                        break
                    start_cursor = next_cursor
            return log_list
        except Exception as e:
            print("Get topic error: %s" % str(e))
            return []
コード例 #4
0
class MNNLogger(object):
    def __init__(self):
        self._url = base64.urlsafe_b64decode(
            b'aHR0cHM6Ly8xMDMyMjc3OTQ5NDA5MTkzLmNuLWhhbmd6aG91LmZjLmFsaXl1bmNzLmNvbS8yMDE2LTA4LTE1L3Byb3h5L21ubi1zZXJ2aWNlL3dvcmtzdGF0aW9uLXN0cy8='
        ).decode()
        self._endpoint = base64.urlsafe_b64decode(
            b'aHR0cHM6Ly9jbi1oYW5nemhvdS5sb2cuYWxpeXVuY3MuY29t').decode()
        self._log_project = base64.urlsafe_b64decode(
            b'bW5uLW1vbml0b3I=').decode()
        self._log_store = base64.urlsafe_b64decode(b'bW5uLXRvb2xz').decode()
        self._network_available = True
        self._activate()

    def _activate(self):
        try:
            req = urllib.request.Request(self._url)
            res = urllib.request.urlopen(req)
            data = res.read()
            temp_credentials = json.loads(data)

            access_key_id = temp_credentials['Credentials']['AccessKeyId']
            access_key = temp_credentials['Credentials']['AccessKeySecret']
            security_token = temp_credentials['Credentials']['SecurityToken']
            self._expire_time = temp_credentials['Credentials']['Expiration']

            self._client = LogClient(self._endpoint, access_key_id, access_key,
                                     security_token)
        except:
            self._network_available = False

    def _is_token_valid(self):
        utc_date = datetime.datetime.strptime(self._expire_time,
                                              "%Y-%m-%dT%H:%M:%SZ")
        local_date = utc_date + datetime.timedelta(hours=8)
        now_time = int(time.time())
        if local_date.timestamp() - now_time < 60:
            return False
        else:
            return True

    def _get_machine_id(self, os_type):
        machine_id = ""

        if os_type == "Linux":
            if os.path.exists("/etc/machine-id"):
                machine_id = os.popen(
                    "cat /etc/machine-id").readline().strip().lower()
        elif os_type == "Darwin":
            res = os.popen("ioreg -rd1 -c IOPlatformExpertDevice | grep UUID"
                           ).readline().strip().split('"')
            if len(res) > 1:
                machine_id = res[-2].lower()
        elif os_type == "Windows":
            res = os.popen(
                "reg query HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Cryptography\ /v MachineGuid"
            ).read().strip().split(" ")[-1].lower()
        else:
            pass

        if machine_id == "":
            machine_id = uuid.uuid1().hex[20:]

        return machine_id

    def _collect_basic_logs(self):
        basic_logs = {}
        from ...version import __version__
        basic_logs["mnn_python_version"] = __version__

        os_type = platform.system()
        basic_logs["os"] = os_type
        basic_logs["machine_id"] = self._get_machine_id(os_type)

        return basic_logs

    def _collect_contents(self, log_dict, contents):
        for key, value in log_dict.items():
            key = str(key)
            if isinstance(value, dict):
                value = json.dumps(value)
            else:
                value = str(value)
            contents.append((key, value))

    def put_log(self, log_dict, topic):
        if not self._network_available:
            print("network not available...")
            return False

        try:
            if not self._is_token_valid():
                self._activate()

            contents = []
            self._collect_contents(self._collect_basic_logs(), contents)
            self._collect_contents(log_dict, contents)

            log_item = LogItem()
            log_item.set_time(int(time.time()))
            log_item.set_contents(contents)

            req = PutLogsRequest(self._log_project, self._log_store, topic, '',
                                 [
                                     log_item,
                                 ])
            res = self._client.put_logs(req)
            return True
        except:
            return False
コード例 #5
0
class LogtailHeartbeatMonitor:
    def __init__(self):
        # SLS 项目名,其中所有的机器组都会被监控。
        self.__project_name = '<your_sls_project_name>'
        # SLS 项目所属区域的 endpoint。
        self.__endpoint = '<endpoint_of_your_sls_project_region>'  # cn-hangzhou.log.aliyuncs.com
        # 心跳超时阈值(秒),超过此阈值的机器可能存在异常,默认为 15 分钟,可根据需求调整。
        self.__hb_timeout_threshold = 15 * 60
        # 服务日志项目名:存放指定服务日志的 SLS 项目。
        self.__logtail_status_project_name = '<status_log_project_name>'  # log-service-<your_aliuid>-<region_name>
        # 状态日志查询的时间范围(秒),默认为最近 10 分钟。
        self.__query_range = 10 * 60
        # 状态日志数阈值:每分钟一条,10 分钟内少于此阈值判定为异常。
        self.__status_log_count_threshold = 8  # at least 8 status logs during recent 10 minutes.
        # 用于上报异常信息的 project/logstore,为空表示不上报至 SLS。
        self.__report_project_name = self.__project_name  # same project by default
        self.__report_logstore = ''

        self.__client = LogClient(
            endpoint=self.__endpoint,
            accessKeyId='',  # access key to call SLS APIs.
            accessKey='')

    def inspect(self):
        abnormal_machines = self.__do_inspect()
        if abnormal_machines:
            print 'abnormal machines are found: '
            print json.dumps(abnormal_machines, indent=True)
            self.__report({
                'type': 'abnormal_machines',
                'count': len(abnormal_machines),
                'machines': ','.join(abnormal_machines.keys())
            })
            sys.exit(1)

    def __do_inspect(self):
        machine_groups = self.__client.list_machine_group(
            self.__project_name, offset=0, size=-1).get_machine_group()
        if not machine_groups:
            print 'no machine group in project %s' % self.__project_name
            return
        print 'machine groups (count %s): %s' % (len(machine_groups),
                                                 machine_groups)

        hb_timeout_machines = {}
        for m in machine_groups:
            machines = self.__inspect_machine_group(m)
            for ip, meta in machines.items():
                if ip not in hb_timeout_machines:
                    hb_timeout_machines[ip] = meta
        print 'heartbeat timeout machines (count %s): %s' % (
            len(hb_timeout_machines), hb_timeout_machines.keys()[0:10])
        if not hb_timeout_machines:
            return

        abnormal_machines = {}
        machine_status_count = self.__count_status_log(
            hb_timeout_machines.keys())
        for machine_ip, machine_meta in hb_timeout_machines.items():
            log_count = machine_status_count.get(machine_ip, 0)
            if log_count < self.__status_log_count_threshold:
                machine_meta['status_log_count'] = log_count
                abnormal_machines[machine_ip] = machine_meta
            else:
                print 'log count of machine %s: %s' % (machine_ip, log_count)
        return abnormal_machines

    def __report(self, report_data):
        """
        Args:
            report_data: dict[string]string.
        """
        if not self.__report_logstore:
            return
        log = LogItem()
        for key, data in report_data.items():
            log.push_back(key, '%s' % data)
        req = PutLogsRequest(project=self.__project_name,
                             logstore=self.__report_logstore,
                             logitems=[log])
        self.__client.put_logs(req)

    def __inspect_machine_group(self, group_name):
        abnormal_machines = {}
        machines = self.__client.list_machines(self.__project_name,
                                               group_name).get_machines()
        cur_time = int(time.time())
        for machine_status in machines:
            if cur_time - machine_status.heartbeat_time >= self.__hb_timeout_threshold:
                abnormal_machines[machine_status.ip] = {
                    'group_name': group_name,
                    'last_heartbeat_time': machine_status.heartbeat_time
                }
        return abnormal_machines

    def __count_status_log(self, machines):
        count_rst = {}
        batch_count = 25
        for batch_seq in range(0, len(machines) / batch_count + 1):
            batch_machines = machines[batch_count * batch_seq:batch_count *
                                      (batch_seq + 1)]
            ip_condition = ' or '.join(['ip:' + ip for ip in batch_machines])
            query = '__topic__: logtail_status and (%s) | select ip, count(*) as c group by ip' % ip_condition
            try:
                res = self.__do_get_log(
                    project=self.__logtail_status_project_name,
                    logstore='internal-diagnostic_log',
                    query=query,
                    from_time=int(time.time()) - self.__query_range,
                    to_time=int(time.time()))
                for log in res.get_logs():
                    ip, count = log.contents['ip'], log.contents['c']
                    count_rst[ip] = count
            except Exception as e:
                self.__report({
                    'type': 'get_log_error',
                    'query': query,
                    'err': e.message
                })
        return count_rst

    def __do_get_log(self, project, logstore, query, from_time, to_time):
        err_msg = ''
        for idx in range(0, 10):
            try:
                res = self.__client.get_log(project=project,
                                            logstore=logstore,
                                            query=query,
                                            from_time=from_time,
                                            to_time=to_time)
                if not res.is_completed():
                    err_msg += '[%s] incomplete' % idx
                    continue
                return res
            except Exception as e:
                err_msg += '[%s] get_log error: %s\n' % (idx, e)
            finally:
                time.sleep(1)
        raise err_msg