def loadcache(): for key in DataStore._preloadkeys: try: DataStore.datadict[key] = pd.read_hdf( DATA_BASE_PATH + os.path.sep + DataStore._get_filename_by_key(key)) except: logger.warning( 'key %s file can not load, maybe something wrong' % key) for id in DataStore._preloadids: key = DataStore._id_key_map.get(id) if key is None: raise CeQuantCoreError('error config preloadids') elif key not in DataStore._preloadkeys: try: DataStore._datadict[key] = pd.read_hdf( DATA_BASE_PATH + os.path.sep + DataStore._get_filename_by_key(key)) except: logger.warning( 'key %s file can not load, maybe something wrong' % key) try: DataStore._cachedf = pd.read_hdf(DATA_BASE_PATH + os.path.sep + DataStore._preloadfile) except: logger.warnning('pre load file %s lost in %s' % (DataStore._preloadfile, DATA_BASE_PATH)) DataStore._lastloadtime = datetime.now().strftime('%Y%m%d-%H%M%S')
def prepare(self): ''' check client paltform ''' # parse json request arguments(application/json) self.parse_json_request_body() self.agent_str = self.request.headers.get('User-Agent', '') self.agent = None self.is_mobile = False self.task_resp = None if self.agent_str: try: self.agent = user_agents.parse(self.agent_str) self.is_mobile = self.agent.is_mobile except UnicodeDecodeError: access_log.warning( 'Unicode decode error, agent str: {}'.format( self.agent_str)) # assume user platfrom is mobile self.is_mobile = True except: access_log.warning( 'Parse user-agent failed, unknown exception.', exc_info=True) self.is_mobile = True
def log_exception(self, typ, value, tb): if isinstance(value, HTTPError): if value.log_message: format = '%d %s: ' + value.log_message args = ([value.status_code, self._request_summary()] + list(value.args)) access_log.warning(format, *args) access_log.error('Exception: %s\n%r', self._request_summary(), self.request, exc_info=(typ, value, tb))
def timeout_handler(stream, getstr): if stream.closed(): return else: try: stream.write(create_error_msg('unknown error').out()) stream.write(SOCKET_EOF) stream.close() logger.warning('%s request timeout' % getstr) except: stream.close(exc_info=True)
def _make_proxy_request(self, request_data): timeout = float(request_data.get("timeout", DEFAULT_TIMEOUT)) validate_cert = bool(request_data.get("validate_cert") or True) max_redirects = request_data.get("max_http_redirects") or 0 follow_redirects = max_redirects > 0 # 0 means do not follow redirects url = request_data.get("url") params = request_data.get("data") post_type = request_data.get("post_type") if params and post_type is None: url = "%s?%s" % (url, urlencode(params)) logger.info("[%s]agent request url: %s", self.id, url) proxy_request = HTTPRequest( url, validate_cert=validate_cert, headers=self._get_proxy_request_headers(request_data), method=request_data.get("method", "GET"), allow_nonstandard_methods=True, connect_timeout=timeout, request_timeout=timeout, streaming_callback=self._streaming_callback, header_callback=self._header_callback, follow_redirects=follow_redirects, max_redirects=max_redirects, prepare_curl_callback=self.prepare_curl_callback, ) role_name = request_data.get("role") if role_name: InterfaceRoleManager.set_curl_interface_role( proxy_request, role_name, ) keystone_auth_info = request_data.get("keystone") if keystone_auth_info: logger.warning( "[%s]agent request required keystone token", ) auth_headers = yield self._get_keystone_auth_headers( keystone_auth_info, validate_cert=validate_cert, ) if not auth_headers: raise gen.Return() proxy_request.headers.update(auth_headers) body = self._get_request_body(request_data) if body: proxy_request.body = body raise gen.Return(proxy_request)
def check_pn_privilege(pn, user): try: record = store.check_pn_privilege(pn, user) except: record = None if not record: access_log.warning('{} can\'t access private network : {}'.format(user, pn)) return False, HTTPError(427) mask = int(record.get('mask', 0)) if mask>>30 & 1: return False, HTTPError(431) return True, record
def _build_tree(self): for indx, token in enumerate(self.sentence): try: head_id = int(token.head_id) except Exception as ex: access_log.warning("提供了错误的数据格式,请检查第{}行数据...".format(indx)) continue if head_id == -1: self.root.right_child.append(token) elif indx > head_id: token.parent = self.sentence[head_id] self.sentence[head_id].right_child.append(token) else: token.parent = self.sentence[head_id] self.sentence[head_id].left_child.append(token)
def write_error(self, status, **kargs): self.set_headers() exc_info = kargs.pop('exc_info', None) if exc_info: stack_trace = traceback.format_exception(*exc_info) stack_trace = str.join('', stack_trace).rstrip('\n') stack_trace = ('%s\n%s' % (self.uuid, stack_trace)) if status >= 500: access_log.error(stack_trace) elif status >= 400: access_log.warning(stack_trace) elif status >= 200: access_log.info(stack_trace) data = kargs.pop('data', None) if data: self.write_json(data) self.finish()
def __exit__(self, exc_type, exc_val, exc_tb): if exc_type == toro.Timeout: access_log.debug("[uid: %s] connection timeout" % self.client_uid) elif exc_type == StreamClosedError: access_log.warning("[uid: %s] stream closed unexpectedly" % self.client_uid) elif exc_type == ConnectError: self.stream.close() access_log.info("[uid: %s] connection refused: %s" % (self.client_uid, exc_val.message)) elif exc_type == Exception: access_log.exception("[uid: %s] error handling stream" % self.client_uid, exc_info=True) if exc_val is not None: if self.client is not None: self.client.disconnect() return True # suppress the raised exception
def prepare(self): ''' check client paltform ''' # parse json request arguments(application/json) self.parse_json_request_body() self.agent_str = self.request.headers.get('User-Agent', '') self.agent = None self.is_mobile = False self.task_resp = None if self.agent_str: try: self.agent = user_agents.parse(self.agent_str) self.is_mobile = self.agent.is_mobile except UnicodeDecodeError: access_log.warning('Unicode decode error, agent str: {}'.format(self.agent_str)) # assume user platfrom is mobile self.is_mobile = True except: access_log.warning('Parse user-agent failed, unknown exception.', exc_info=True) self.is_mobile = True
def __exit__(self, exc_type, exc_val, exc_tb): if exc_type == toro.Timeout: access_log.debug("[uid: %s] connection timeout" % self.client_uid) elif exc_type == StreamClosedError: access_log.warning('[uid: %s] stream closed unexpectedly' % self.client_uid) elif exc_type == ConnectError: self.stream.close() access_log.info('[uid: %s] connection refused: %s' % (self.client_uid, exc_val.message)) elif exc_type == Exception: access_log.exception('[uid: %s] error handling stream' % self.client_uid, exc_info=True) if exc_val is not None: if self.client is not None: self.client.disconnect() return True # suppress the raised exception
def read_text(cls, file_path): """根据输入文本文档数据生成依存树""" # sentences列表主要放置校正索引后的实体识别结果 sentences = [] # tree_list列表放置sentences列表中实体构建出来的依存树 tree_list = [] # sentences_value_list内每个元素为1个列表,每个句子的实体识别结果分别存储其内 sentences_value_list = [] with open(file_path, encoding='utf-8') as f: # 定义初始偏移量 start, end = 0, 0 # 定义tokens列表 tokens = [] # sentence_value_list,用于回收实体内容,用于拼装整段文本 sentence_value_list = [] for line_no, line in enumerate(f): line = line.strip() if not line: # 遇到下一句 start, end = 0, 0 # 下一句话,从头初始化word偏移量 if sentence_value_list: sentences_value_list.append( sentence_value_list) # 每个句子的实体识别结果分别存储 sentence_value_list = [] if tokens: sentence, sent_tree = cls.read_tokens(tokens) sentences.append(sentence) tree_list.append(sent_tree) tokens = [] continue line_list = line.split('\t') if len(line_list) < 4: access_log.warning("第{}行输入数据格式不正确,请检查...".format(line_no)) continue # 格式校正 word, token_id, tag, head_id = line_list[0:4] word, tag = str(word).strip(), str(tag).strip() sentence_value_list.append(word) if isinstance(token_id, float): token_id = int(token_id) if isinstance(head_id, float): head_id = int(head_id) token_id, head_id = str(token_id).strip(), str(head_id).strip() # 计算每个实体的偏移量 end = start + len(word) offset = (start, end) start = end # 去除未识别实体(unk) if tag == 'unk': continue # 实例化为token对象并添加到tokens列表中 tokens.append(Token(word, token_id, tag, head_id, offset)) # 最后一句话的处理 if sentence_value_list: sentences_value_list.append(sentence_value_list) if tokens: sentence, sent_tree = cls.read_tokens(tokens) sentences.append(sentence) tree_list.append(sent_tree) return sentences, tree_list, sentences_value_list
def post(self): """ 这里是与用户交互的部分, 我们一定要做输入检验, 防止被注入 其中, 程序里必须要求时间参数, 起止时间均需要, 其他参数为可选参数 is_loop, is_drop, is_probe 只允许0,1作为输入, 或者不输入 """ start_time = self.get_argument('start_time', None) end_time = self.get_argument('end_time', None) ip_src = self.get_argument('ip_src', '0.0.0.0') ip_dst = self.get_argument('ip_dst', '0.0.0.0') protocol = self.get_argument('protocol', -1) is_loop = self.get_argument('is_loop', -1) is_drop = self.get_argument('is_drop', -1) is_probe = self.get_argument('is_probe', -1) try: # IP检验 import socket socket.inet_aton(ip_src) socket.inet_aton(ip_dst) except OSError as e: access_log.error('Get wrong ip {}, {}'.format(ip_src, ip_dst)) self.write_json(None, status_code=400, msg='参数错误') return except Exception as e: access_log.error('Get error {}'.format(e)) try: # 时间检验 start_time = to_time_stamp(start_time) end_time = to_time_stamp(end_time) except (ValueError, TypeError) as e: access_log.error('Get err time {}, {}'.format( start_time, end_time)) self.write_json(None, status_code=400, msg='参数错误') return except Exception as e: access_log.error('Get error {}'.format(e)) if start_time > end_time: # 起止时间一定要有大小 access_log.warning('Get err time {}, {}'.format( start_time, end_time)) self.write_json(None, status_code=400, msg='参数错误') return try: # 试图进行转换 is_probe = int(is_probe) is_drop = int(is_drop) is_loop = int(is_loop) protocol = int(protocol) except (ValueError, TypeError) as e: access_log.error('Get Err {}, {}, {}, {}' \ .format(is_probe, is_drop, is_loop, protocol)) self.write_json(None, status_code=400, msg='参数错误') return except Exception as e: access_log.error('Get error {}'.format(e)) last_sql = QUERY_SQL day1 = int(time_to_day(start_time)) day2 = int(time_to_day(end_time)) # 数据库中时间戳作为索引, 所以首先加入时间戳判断 last_sql += ' AND fdate BETWEEN {} AND {}'.format(day1, day2) # 以下的各种判断均排除缺省值 if ip_src != '0.0.0.0': last_sql += ' AND s_ip = {}'.format(ip_src) if ip_dst != '0.0.0.0': last_sql += ' AND d_ip = {}'.format(ip_dst) if is_loop != -1: last_sql += ' AND is_loop = {}'.format(is_loop) if is_drop != -1: last_sql += ' AND is_drop = {}'.format(is_drop) if is_probe != -1: last_sql += ' AND is_probe = {}'.format(is_probe) access_log.debug(last_sql) rlts = [] try: cur = self.db.cursor() cur.execute(last_sql) rlts = cur.fetchall() except Exception as e: access_log.error('Get error {}'.format(e)) finally: cur.close() ret_rlts = [] day1_ms = (start_time % (24 * 3600)) * 1000 day2_ms = (end_time % (24 * 3600)) * 1000 for r in rlts: # 简单过滤不符合时间的数据列 if r['fdate'] == day1 and r['generate_time'] < day1_ms: continue if r['fdate'] == day2 and r['generate_time'] > day2_ms: continue ret_rlts.append(r) self.write_json(rlts)
def _anti_json(self, dicts, spec=[]): for key, val in dicts.items(): if hasattr(self, key) and key in spec: setattr(self, key, val) else: access_log.warning(u'%s没有属性%s' % (self.__class__.__name__, key))
def get_desc_by_id(id): desc = DataStore._id_desc_map.get(id) if desc is None: logger.warning('can not find desc of %s', id) return desc