class MigrationLogstore(object): def __init__( self, endpoint, access_id, access_key, project_name, logstore_name, topic, source, ): self._log_client = LogClient( endpoint=endpoint, accessKeyId=access_id, accessKey=access_key, ) self._project_name = project_name self._logstore_name = logstore_name self._topic, self._source = topic, source @property def name(self): return self._logstore_name def put_logs(self, logitems): self._log_client.put_logs( PutLogsRequest( project=self._project_name, logstore=self._logstore_name, topic=self._topic, source=self._source, logitems=logitems, ))
def send_log_to_aliyun(logstore, message): """ 向阿里云日志系统 发送log函数 :param message: 将要发送的log字符串 :return: None """ # 构建一个 client 使用 client 实例的方法来操作日志服务 client = LogClient(settings.END_POINT, settings.ACCESS_KEY_ID, settings.ACCESS_KEY) log_item = LogItem() log_item.set_time(int(time.time())) log_item.set_contents([('message', message)]) put_logs_request = PutLogsRequest(settings.PROJECT, logstore, '', '', [log_item]) client.put_logs(put_logs_request) # 发送log
class AliyunLog(): def __init__(self, endpoint, access_key_id, access_key, project=None, logstore=None, topic=None, source=None): self.logstore = logstore self.project = project self.topic = topic self.source = source assert isinstance(self.topic, str), 'topic must be string' # assert len(self.topic.split(':')) == 2, 'topic must format like xxx:xxx' self.client = LogClient(endpoint, access_key_id, access_key) # def add_log(self, item:dict): # contents为[(name1, value1), (name2, value2), ...] # log_item = LogItem(int(time.time()), list(item.items())) # log_req = PutLogsRequest(self.project, self.logstore, topic=self.topic, source=self.source, logitems=[log_item]) # self.client.put_logs(log_req) def add_log(self, item: str): # item为json.dumps(obj) log_item = LogItem(int(time.time()), [('content', item)]) log_req = PutLogsRequest(self.project, self.logstore, topic=self.topic, source=self.source, logitems=[log_item]) self.client.put_logs(log_req) def get_topics(self, fromTime=None, toTime=None): try: req = GetLogsRequest( self.project, self.logstore, fromTime=fromTime, toTime=toTime, topic=self.topic, query='*|select "__topic__" group by "__topic__"') res = self.client.get_logs(req) return [log.get_contents()['__topic__'] for log in res.get_logs()] except Exception as e: print("Get topic error: %s" % str(e)) return [] def get_logs(self, fromTime, toTime): try: req = GetLogsRequest(self.project, self.logstore, fromTime=fromTime, toTime=toTime, query='*') res = self.client.get_logs(req) return [log.get_contents() for log in res.get_logs()] except Exception as e: print("Get logs error: %s" % str(e)) return [] try: listShardRes = self.client.list_shards(self.project, self.logstore) log_list = [] for shard in listShardRes.get_shards_info(): shard_id = shard["shardID"] res = self.client.get_cursor(self.project, self.logstore, shard_id, fromTime) start_cursor = res.get_cursor() res = self.client.get_cursor(self.project, self.logstore, shard_id, toTime) end_cursor = res.get_cursor() while True: loggroup_count = 100 # 每次读取100个包 res = self.client.pull_logs(self.project, self.logstore, shard_id, start_cursor, loggroup_count, end_cursor) log_list += res.get_loggroup_json_list() next_cursor = res.get_next_cursor() if next_cursor == start_cursor: break start_cursor = next_cursor return log_list except Exception as e: print("Get topic error: %s" % str(e)) return []
class MNNLogger(object): def __init__(self): self._url = base64.urlsafe_b64decode( b'aHR0cHM6Ly8xMDMyMjc3OTQ5NDA5MTkzLmNuLWhhbmd6aG91LmZjLmFsaXl1bmNzLmNvbS8yMDE2LTA4LTE1L3Byb3h5L21ubi1zZXJ2aWNlL3dvcmtzdGF0aW9uLXN0cy8=' ).decode() self._endpoint = base64.urlsafe_b64decode( b'aHR0cHM6Ly9jbi1oYW5nemhvdS5sb2cuYWxpeXVuY3MuY29t').decode() self._log_project = base64.urlsafe_b64decode( b'bW5uLW1vbml0b3I=').decode() self._log_store = base64.urlsafe_b64decode(b'bW5uLXRvb2xz').decode() self._network_available = True self._activate() def _activate(self): try: req = urllib.request.Request(self._url) res = urllib.request.urlopen(req) data = res.read() temp_credentials = json.loads(data) access_key_id = temp_credentials['Credentials']['AccessKeyId'] access_key = temp_credentials['Credentials']['AccessKeySecret'] security_token = temp_credentials['Credentials']['SecurityToken'] self._expire_time = temp_credentials['Credentials']['Expiration'] self._client = LogClient(self._endpoint, access_key_id, access_key, security_token) except: self._network_available = False def _is_token_valid(self): utc_date = datetime.datetime.strptime(self._expire_time, "%Y-%m-%dT%H:%M:%SZ") local_date = utc_date + datetime.timedelta(hours=8) now_time = int(time.time()) if local_date.timestamp() - now_time < 60: return False else: return True def _get_machine_id(self, os_type): machine_id = "" if os_type == "Linux": if os.path.exists("/etc/machine-id"): machine_id = os.popen( "cat /etc/machine-id").readline().strip().lower() elif os_type == "Darwin": res = os.popen("ioreg -rd1 -c IOPlatformExpertDevice | grep UUID" ).readline().strip().split('"') if len(res) > 1: machine_id = res[-2].lower() elif os_type == "Windows": res = os.popen( "reg query HKEY_LOCAL_MACHINE\SOFTWARE\Microsoft\Cryptography\ /v MachineGuid" ).read().strip().split(" ")[-1].lower() else: pass if machine_id == "": machine_id = uuid.uuid1().hex[20:] return machine_id def _collect_basic_logs(self): basic_logs = {} from ...version import __version__ basic_logs["mnn_python_version"] = __version__ os_type = platform.system() basic_logs["os"] = os_type basic_logs["machine_id"] = self._get_machine_id(os_type) return basic_logs def _collect_contents(self, log_dict, contents): for key, value in log_dict.items(): key = str(key) if isinstance(value, dict): value = json.dumps(value) else: value = str(value) contents.append((key, value)) def put_log(self, log_dict, topic): if not self._network_available: print("network not available...") return False try: if not self._is_token_valid(): self._activate() contents = [] self._collect_contents(self._collect_basic_logs(), contents) self._collect_contents(log_dict, contents) log_item = LogItem() log_item.set_time(int(time.time())) log_item.set_contents(contents) req = PutLogsRequest(self._log_project, self._log_store, topic, '', [ log_item, ]) res = self._client.put_logs(req) return True except: return False
class LogtailHeartbeatMonitor: def __init__(self): # SLS 项目名,其中所有的机器组都会被监控。 self.__project_name = '<your_sls_project_name>' # SLS 项目所属区域的 endpoint。 self.__endpoint = '<endpoint_of_your_sls_project_region>' # cn-hangzhou.log.aliyuncs.com # 心跳超时阈值(秒),超过此阈值的机器可能存在异常,默认为 15 分钟,可根据需求调整。 self.__hb_timeout_threshold = 15 * 60 # 服务日志项目名:存放指定服务日志的 SLS 项目。 self.__logtail_status_project_name = '<status_log_project_name>' # log-service-<your_aliuid>-<region_name> # 状态日志查询的时间范围(秒),默认为最近 10 分钟。 self.__query_range = 10 * 60 # 状态日志数阈值:每分钟一条,10 分钟内少于此阈值判定为异常。 self.__status_log_count_threshold = 8 # at least 8 status logs during recent 10 minutes. # 用于上报异常信息的 project/logstore,为空表示不上报至 SLS。 self.__report_project_name = self.__project_name # same project by default self.__report_logstore = '' self.__client = LogClient( endpoint=self.__endpoint, accessKeyId='', # access key to call SLS APIs. accessKey='') def inspect(self): abnormal_machines = self.__do_inspect() if abnormal_machines: print 'abnormal machines are found: ' print json.dumps(abnormal_machines, indent=True) self.__report({ 'type': 'abnormal_machines', 'count': len(abnormal_machines), 'machines': ','.join(abnormal_machines.keys()) }) sys.exit(1) def __do_inspect(self): machine_groups = self.__client.list_machine_group( self.__project_name, offset=0, size=-1).get_machine_group() if not machine_groups: print 'no machine group in project %s' % self.__project_name return print 'machine groups (count %s): %s' % (len(machine_groups), machine_groups) hb_timeout_machines = {} for m in machine_groups: machines = self.__inspect_machine_group(m) for ip, meta in machines.items(): if ip not in hb_timeout_machines: hb_timeout_machines[ip] = meta print 'heartbeat timeout machines (count %s): %s' % ( len(hb_timeout_machines), hb_timeout_machines.keys()[0:10]) if not hb_timeout_machines: return abnormal_machines = {} machine_status_count = self.__count_status_log( hb_timeout_machines.keys()) for machine_ip, machine_meta in hb_timeout_machines.items(): log_count = machine_status_count.get(machine_ip, 0) if log_count < self.__status_log_count_threshold: machine_meta['status_log_count'] = log_count abnormal_machines[machine_ip] = machine_meta else: print 'log count of machine %s: %s' % (machine_ip, log_count) return abnormal_machines def __report(self, report_data): """ Args: report_data: dict[string]string. """ if not self.__report_logstore: return log = LogItem() for key, data in report_data.items(): log.push_back(key, '%s' % data) req = PutLogsRequest(project=self.__project_name, logstore=self.__report_logstore, logitems=[log]) self.__client.put_logs(req) def __inspect_machine_group(self, group_name): abnormal_machines = {} machines = self.__client.list_machines(self.__project_name, group_name).get_machines() cur_time = int(time.time()) for machine_status in machines: if cur_time - machine_status.heartbeat_time >= self.__hb_timeout_threshold: abnormal_machines[machine_status.ip] = { 'group_name': group_name, 'last_heartbeat_time': machine_status.heartbeat_time } return abnormal_machines def __count_status_log(self, machines): count_rst = {} batch_count = 25 for batch_seq in range(0, len(machines) / batch_count + 1): batch_machines = machines[batch_count * batch_seq:batch_count * (batch_seq + 1)] ip_condition = ' or '.join(['ip:' + ip for ip in batch_machines]) query = '__topic__: logtail_status and (%s) | select ip, count(*) as c group by ip' % ip_condition try: res = self.__do_get_log( project=self.__logtail_status_project_name, logstore='internal-diagnostic_log', query=query, from_time=int(time.time()) - self.__query_range, to_time=int(time.time())) for log in res.get_logs(): ip, count = log.contents['ip'], log.contents['c'] count_rst[ip] = count except Exception as e: self.__report({ 'type': 'get_log_error', 'query': query, 'err': e.message }) return count_rst def __do_get_log(self, project, logstore, query, from_time, to_time): err_msg = '' for idx in range(0, 10): try: res = self.__client.get_log(project=project, logstore=logstore, query=query, from_time=from_time, to_time=to_time) if not res.is_completed(): err_msg += '[%s] incomplete' % idx continue return res except Exception as e: err_msg += '[%s] get_log error: %s\n' % (idx, e) finally: time.sleep(1) raise err_msg