def _check_getvariable_expression_and_return_type(exp, trigger_event, error_prefix): if not isinstance(exp.trigger_event, list) or len(exp.trigger_event) != 2: raise RuntimeError(utf8(error_prefix) + '事件配置不正确') check_event_exist(exp.trigger_event[1], error_prefix) if exp.trigger_event != trigger_event: raise RuntimeError(utf8(error_prefix) + '触发事件不一致') trigger_event = exp.trigger_event trigger_fields = exp.trigger_fields if not trigger_fields: raise RuntimeError(utf8(error_prefix) + '触发字段为空') for field in trigger_fields: check_field_exist(trigger_event[1], field, error_prefix) variable = exp.variable if not isinstance(variable, list) or len(variable) != 2: raise RuntimeError(utf8(error_prefix) + '变量配置不正确') check_variable_exist(variable[1], error_prefix) variable_schema = get_variable_schema(variable[1]) # 实际数据基本为int、double,不过还是看实际类型 value_type = variable_schema['value'] # subtype优先,对应到profile等类型;一般类型为type return value_type[1] or value_type[0]
def gen_filter_from_location_exp(location_exp, trigger_event): """ 从getlocation配置中产生表达式 :return: """ fields = location_exp.source_event_field.split('.') if len(fields) != 3 or fields[0] != trigger_event[0] or fields[1] != trigger_event[1]: raise RuntimeError('地理位置参数不正确') if location_exp.op == '=': condition_type = 'locationequals' elif location_exp.op == '!=': condition_type = '!locationequals' elif location_exp.op == 'belong': condition_type = 'locationcontainsby' elif location_exp.op == '!belong': condition_type = '!locationcontainsby' else: raise RuntimeError('操作不正确') location_value = location_exp.location_value if isinstance(location_value, list): location_value = ','.join(location_value) location_value = utf8(location_value) return _gen_string_filter(fields[1], fields[2], condition_type, location_value, utf8(location_exp.location_type))
def _gen_string_filter(source, string_field, op, op_value, param=''): """ get string field function """ if not op or op not in _string_condition_ops: raise RuntimeError('string类型不支持({})操作'.format(utf8(op))) if op == 'in': parts = op_value.split(',') if len(parts) > 10: raise RuntimeError('属于最多支持10个属性') condition = [_gen_string_filter(source, string_field, '==', part, param).get_dict() for part in parts] return Filter('', '', '', '', '', '', 'or', '', condition) elif op == '!in': parts = op_value.split(',') if len(parts) > 10: raise RuntimeError('属于最多支持10个属性') condition = [_gen_string_filter(source, string_field, '!=', part, param).get_dict() for part in parts] return Filter('', '', '', '', '', '', 'and', '', condition) if op == 'regex': op = 'match' elif op == '!regex': op = '!match' elif op == 'contain': op = 'contains' elif op == '!contain': op = '!contains' return Filter(source, string_field, '', '', op, utf8(op_value), 'simple', param, None)
def parser_notice(*args): notice = {} for k, v in zip(QUERY_NOTICE_PARAMS, list(args)): notice[k] = utf8(v) notice['count'] = args[-1] return notice
def check_and_return_int(str_value, error_prefix=''): """ 检查是否可以转化为数字,并且返回 :param str_value: 字符属性 :return: 如果可转化为数字,转为数字 :param error_prefix: 出错提示 :raise: 无法转换为数字 """ try: result = int(str_value) except: raise RuntimeError( utf8(error_prefix) + '(%s)不是合理的数字' % utf8(str_value)) return result
def parse(self, data): """ parse text in properties format to python dict :param data: text in properties format """ try: result = {} for _ in data.splitlines(): _ = utf8(_) _ = _.strip() if not _: continue if _.startswith("#"): continue key, value = _.split("=", 1) key, value = key.strip(), value.strip() if not key: continue result[key] = value return result except Exception as err: LOGGER.error( "config %s: fail to parse properties config, the error is %s", self.name, err) return {}
def get_python_json_friendly(data): if not data: return "" if "\\x" not in data: return data data = utf8(data) result = bytearray() length = len(data) cursor = 0 while cursor < length: ch = data[cursor] if ch == "\\" and cursor <= length - 4 and data[cursor + 1].lower() == "x": hexvalue = data[cursor + 2:cursor + 4] try: result.append(int(hexvalue, 16)) cursor += 4 continue except Exception as err: pass result.append(ch) cursor += 1 return str(result)
def check_and_return_pattern(str_value, error_prefix=''): """ 检查是否可以转化为正则表达式, 并且返回 :param str_value: 字符属性 :return: 如果可转化为正则,返回正则 :param error_prefix: 出错提示 :raise: 无法转换为正则 """ try: result = re.compile(str_value) except: raise RuntimeError( utf8(error_prefix) + '(%s)不是合理的正则表达式' % utf8(str_value)) return result
def _check_exp_and_return_type(exp, trigger_event, error_prefix): exp_checking_fn_dict = { ('event', ''): _check_event_expression_and_return_type, ('constant', ''): _check_constant_expression_and_return_type, ('func', 'getvariable'): _check_getvariable_expression_and_return_type, ('func', 'count'): _check_count_expression_and_return_type, ('func', 'setblacklist'): _check_setblacklist_expression_and_return_type, ('func', 'time'): _check_time_expression_and_return_type, ('func', 'sleep'): _check_sleep_expression_and_return_type, ('func', 'spl'): _check_spl_expression_and_return_type, ('func', 'getlocation'): _check_location_expression_and_return_type } fn = exp_checking_fn_dict.get((exp.type, exp.subtype)) if not fn: raise RuntimeError(utf8(error_prefix) + '表达式(%s:%s)不支持' % (utf8(exp.type), utf8(exp.subtype))) return fn(exp, trigger_event, error_prefix)
def _gen_number_filter(source, number_field, op, op_value, is_long=False): """ get number field function """ if not op or op not in _number_condition_ops: raise RuntimeError('数字类型不支持({})操作'.format(utf8(op))) if op == 'between': parts = op_value.split(',') if len(parts) != 2: raise RuntimeError('介于需要两个参数') left, right = parts left_condition = Filter(source, number_field, '', '', '>=', utf8(left), 'simple', '', None).get_dict() right_condition = Filter(source, number_field, '', '', '<=', utf8(right), 'simple', '', None).get_dict() return Filter('', '', '', '', '', '', 'and', '', [left_condition, right_condition]) elif op == 'in': parts = op_value.split(',') if len(parts) > 10: raise RuntimeError('属于最多支持10个属性') condition = [_gen_number_filter(source, number_field, '==', part, is_long).get_dict() for part in parts] return Filter('', '', '', '', '', '', 'or', '', condition) elif op == '!in': parts = op_value.split(',') if len(parts) > 10: raise RuntimeError('属于最多支持10个属性') condition = [_gen_number_filter(source, number_field, '!=', part, is_long).get_dict() for part in parts] return Filter('', '', '', '', '', '', 'and', '', condition) # simple condition try: if is_long: op_value = int(op_value) else: op_value = float(op_value) except: raise RuntimeError('({})不是数字'.format(utf8(op_value))) return Filter(source, number_field, '', '', op, utf8(op_value), 'simple', '', None)
def gen_function(method, field): """ 根据配置产生count/distinct count的聚合算子。 :return: """ if not method or method not in {'count', 'distinct_count'}: raise RuntimeError('不支持操作({})'.format(utf8(method))) return Function(method, '', field, '', '', '')
def get_variable_schema(variable_name, error_prefix=''): """ 获取某个变量的schema,返回为一个字典{字段名称:字段属性} :param variable_name: 变量名称 :param error_prefix: 出错提示 :return: dict(field->type) """ variable = get_variable_from_registry('nebula', variable_name) if not variable: raise RuntimeError( utf8(error_prefix) + '变量(%s)定义配置不存在' % utf8(variable_name)) result = get_variable_fields_mapping_from_registry('nebula', variable_name) if not result: raise RuntimeError( utf8(error_prefix) + '变量(%s)定义配置不存在' % utf8(variable_name)) return result
def add_metrics(self, db, metrics_name, tags, value, expire_seconds=None, timestamp=None): self.idb.switch_database(db) tags_dict = dict() fields = dict() if tags: for k, v in tags.iteritems(): tags_dict[utf8(k)] = utf8(v) fields["value"] = float(value) point = { "measurement": metrics_name, "tags": tags_dict, "fields": fields } self.idb.write_points([point], database=db, time_precision="ms")
def gen_ordinary_filter(source_name, field, op, op_value): """ 根据操作符和操作数产生普通的条件配置 """ if not field: raise RuntimeError('条件的属性为空') field_type = get_variable_field_type(source_name, field) if not field_type: raise RuntimeError('无法从事件{}中取得字段{}的定义'.format(utf8(source_name), utf8(field))) if field_type == 'string': return _gen_string_filter(source_name, field, op, op_value) elif field_type == 'long': return _gen_number_filter(source_name, field, op, op_value, is_long=True) elif field_type == 'double': return _gen_number_filter(source_name, field, op, op_value, is_long=False) else: raise RuntimeError('不支持类型为({})的条件'.format(utf8(field_type)))
def check_condition_support(left_type, operation_exp, right_value, error_prefix=''): """ 检查左右表达式是否合法, 以及操作是否合法 :param left_type: 左值数据类型 :param operation_exp: 操作符 :param right_value: 右值 :param error_prefix: 出错提示 :return: """ support = False if left_type in ('long', 'int', 'double', 'float'): if operation_exp in expressions_supported_on_number: support = True elif left_type in ('str', 'string'): if operation_exp in expressions_supported_on_string: support = True else: raise RuntimeError( utf8(error_prefix) + '不支持类型为(%s)的操作' % utf8(left_type)) if not support: raise RuntimeError(utf8(error_prefix) + '类型(%s)不支持操作(%s)' \ % (utf8(left_type), utf8(expression_to_name_of_operations[operation_exp]))) check_right_exp(left_type, operation_exp, right_value, error_prefix) if 'between' in operation_exp: # should be two arguments if len(right_value.split(',')) != 2: raise RuntimeError(utf8(error_prefix) + '介于需要逗号分隔的两个数字')
def check_term(term, trigger_event, error_prefix): """ 检查term正确性. :param term: :param trigger_event: :param error_prefix: :return: """ if term.remark is None and term.left.subtype != 'setblacklist': raise RuntimeError(utf8(error_prefix) + '条款描述为空') if term.scope not in {'realtime', 'profile'}: raise RuntimeError(utf8(error_prefix) + '条款的适用类型错误') left = term.left if left is None: raise RuntimeError(utf8(error_prefix) + '条款左表达式为空') return_type = _check_exp_and_return_type(left, trigger_event, utf8(error_prefix) + '左表达式>>') if left.subtype in {'setblacklist', 'time', 'getlocation', 'sleep', 'spl'}: # @todo no right set # no right exp pass else: right = term.right if right is None: raise RuntimeError(utf8(error_prefix) + '条款右表达式为空') _check_exp_and_return_type(right, trigger_event, utf8(error_prefix) + '右表达式>>') if left.type == 'event': check_condition_support(return_type, term.op, right.value, error_prefix) else: # 其他类型都默认为value字段 check_condition_support(return_type, term.op, right.value, error_prefix)
def add_metrics(self, db, metrics_name, tags, value, expire_seconds, timestamp=None): self.idb.switch_database(db) fields = dict() if tags: for k, v in tags.iteritems(): fields[utf8(k)] = utf8(v) fields["value"] = value if timestamp: fields["time"] = timestamp point = { "name": metrics_name, "columns": fields.keys(), "points": [fields.values()] } self.idb.write_points([point], database=db, time_precision="ms")
def _check_constant_expression_and_return_type(exp, trigger_event, error_prefix): """ 检验event expression的右值,应该是字符串常量. :param exp: :param trigger_event: :param error_prefix: :return: """ if exp.value is None: raise RuntimeError(utf8(error_prefix) + '常量不能为NULL值') # todo value type return 'string'
def gen_ip_trigger_variable_from_strategy(strategy, trigger_variable_name, is_delay=False): """ 产生ip维度的trigger变量 new mode :return: """ if not strategy: return conditions = [] trigger_event = get_trigger_event(strategy, is_delay) for t in strategy.terms: left = t.left if left.type == 'event': c = gen_filter_from_event_exp(left, t.op, t.right) if c: conditions.append(c) # location condition has lower priority for t in strategy.terms: left = t.left if left.type == 'func' and left.subtype == 'getlocation': c = gen_filter_from_location_exp(left, trigger_event) if c: conditions.append(c) total_filter = {} if is_delay: total_filter = gen_ordinary_filter(trigger_event[1], 'delay_strategy', '==', strategy.name).get_dict() else: if conditions: conditions = [_.get_dict() for _ in conditions] total_filter = Filter('', '', '', '', '', '', 'and', '', conditions).get_dict() remark = 'ip trigger for strategy {}'.format(utf8(strategy.name)) variable = VariableModel('realtime', 'nebula', trigger_variable_name, remark, remark, '', 'enable', 'filter', '', '', '', [{ 'app': trigger_event[0], 'name': trigger_event[1] }], total_filter, {}, {}, ['c_ip']) add_variable_to_registry(variable) return variable
def gen_notice_statistics(): """ 查询历史notice,并且统计命中tag,存入notice_stat数据库表,生成风险名单报表 """ logger.info('开始统计风险名单') start_time = int(settings.Working_TS) * 1000 end_time = start_time + 60 * 60 * 1000 strategies_weigh = utils.Strategies_Weigh try: # 初始化数据库连接 conn = yield tornado_mysql.connect(**config) cursor = conn.cursor() query_params = ','.join(QUERY_NOTICE_PARAMS) insert_values = [] yield cursor.execute( NOTICE_QUERY_STRING % (query_params, start_time, end_time, query_params)) for _ in cursor: # 将查询结果解析为notice dict,并且根据命中策略名查询命中tag notice = parser_notice(*_) notice['timestamp'] = start_time notice['last_modified'] = millis_now() if notice['strategy_name'] in strategies_weigh: tags = strategies_weigh.get(notice['strategy_name'], {}).get('tags', []) # 将每一个命中tag和统计后的notice组合存入数据库 for tag in tags: notice['tag'] = utf8(tag) insert_values.append( [notice[p] for p in INSERT_NOTICE_PARAMS]) # 避免重复插入数据,需要先删除该时段数据,重新插入 yield cursor.execute(NOTICE_DELETE_STRING % start_time) yield cursor.executemany(NOTICE_INSERT_STRING, insert_values) # 提交,不然无法保存新建数据 conn.commit() # 关闭游标 cursor.close() # 关闭连接 conn.close() logger.info('风险名单统计完成') except Exception as e: logger.error(e) logger.error('风险名单统计失败')
def _check_setblacklist_expression_and_return_type(exp, trigger_event, error_prefix): if exp.name not in {'VISITOR', 'ORDER', 'ACCOUNT', 'TRANSACTION', 'MARKETING', 'OTHER'}: raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的场景' % utf8(exp.name)) if exp.check_type not in {'IP', 'USER', 'DeviceID', 'OrderID'} \ and (exp.check_type.find('(') == -1 or exp.check_type.find(')') == -1): raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的值类型' % utf8(exp.check_type)) if exp.decision not in {'accept', 'review', 'reject'}: raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的黑名单处置方式' % utf8(exp.decision)) if not exp.ttl > 0: raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的ttl值' % utf8(exp.ttl)) # check_value, 黑名单字段 check_field_exist(trigger_event[1], exp.check_value, error_prefix) return ''
def get(self, ): """ Get the geo information of one ip @API summary: ip geo information notes: Get the geo information of one ip tags: - platform parameters: - name: ip in: query required: false type: string description: ip address - name: mobile in: query required: false type: string description: mobile phone number produces: - application/json """ from threathunter_common.geo.geoutil import get_ip_location from threathunter_common.util import utf8 from threathunter_common.geo.phonelocator import get_geo import ipaddr ip = self.get_argument('ip', "") mobile = self.get_argument('mobile', "") result = "未知" try: if ip: ip_addr = ipaddr.IPAddress(ip) if ip_addr.is_loopback or ip_addr.is_private: result = "内网地址" else: result = get_ip_location(ip) elif mobile: result = get_geo(mobile) except Exception as ignore: pass result = utf8(result) self.finish(json.dumps({"address": result}))
def gen_dimension_trigger_variable_from_strategy(strategy, trigger_variable_name, dimension, is_delay=False): """ 产生uid/did维度的trigger变量 new mode :return: """ trigger_event = get_trigger_event(strategy, is_delay) # mappings, getvariable和count的触发字段必须包含在trigger event,这样在collect variable才能获取 dimension_count = 0 for t in strategy.terms: left = t.left # only care realtime vars if t.scope != 'realtime': continue # only dimension related if left.subtype in {'getvariable', 'count'}: if get_dimension_from_trigger_keys( left.trigger_fields) != dimension: continue else: dimension_count += 1 if not dimension_count: return None dimension_field = get_field_from_dimension(dimension) filter_dict = gen_ordinary_filter(trigger_event[1], dimension_field, '!=', '').get_dict() remark = '{} trigger for strategy {}'.format(dimension, utf8(strategy.name)) variable = VariableModel('realtime', 'nebula', trigger_variable_name, remark, remark, '', 'enable', 'filter', '', '', '', [{ 'app': trigger_event[0], 'name': trigger_event[1] }], filter_dict, {}, {}, [dimension_field]) add_variable_to_registry(variable) return variable
def _check_event_expression_and_return_type(exp, trigger_event, error_prefix): """ 检验event expression中的event存在性,并且返回使用字段的数据类型 :param exp: :param trigger_event: :param error_prefix: :return: """ event = exp.event field = exp.field if not isinstance(event, list) or len(event) != 2: raise RuntimeError(utf8(error_prefix) + '事件配置不正确') check_event_exist(event[1], error_prefix) check_variable_exist(event[1], error_prefix) check_field_exist(event[1], field, error_prefix) return get_event_field_type(event[1], field, error_prefix)
def extract_body_from_text(text): if not text: return "" text = utf8(text) result = bytearray() length = len(text) cursor = 0 while cursor < length: ch = text[cursor] if ch == "\\" and cursor <= length - 4: octvalue = text[cursor+1:cursor+4] if octvalue.isdigit(): result.append(int(octvalue, 8)) cursor += 4 continue result.append(ch) cursor += 1 return str(result)
def get_variable_field_type(variable_name, field_name, error_prefix=''): """ 获取某个变量的某个字段的类型 """ schema = get_variable_schema(variable_name) result_type = schema.get(field_name) if not result_type: raise RuntimeError( utf8(error_prefix) + '变量(%s)不包含字段(%s)' % (utf8(variable_name), utf8(field_name))) # 策略中的目前基本是基本类型 if result_type[1]: raise RuntimeError( utf8(error_prefix) + '暂不支持%s(%s)这种复杂数据类型' % (utf8(field_name), utf8(result_type))) return result_type[0]
def get_event_field_type(event_name, field_name, error_prefix=''): """ 获取某个事件的某个字段的类型 :param event_name: 事件名称 :param field_name: 字段名称 :param error_prefix: 出错提示 :return: field type """ schema = get_event_schema(event_name) result_type = schema.get(field_name) if not result_type: raise RuntimeError( utf8(error_prefix) + '事件(%s)不包含字段(%s)' % (utf8(event_name), utf8(field_name))) # 策略中的目前基本是基本类型 if result_type[1]: raise RuntimeError( utf8(error_prefix) + '暂不支持%s(%s)这种复杂数据类型' % (utf8(field_name), utf8(result_type))) return result_type[0]
def get_event_schema(event_name, error_prefix=''): """ 获取某个事件的schema,返回为一个字典{字段名称:字段属性} :param event_name: 事件名称 :param error_prefix: 出错提示 :return: dict(field->type) """ # 策略里的event实际从event/filter variable里面来 event_variable = get_variable_from_registry('nebula', event_name) if not event_variable: raise RuntimeError( utf8(error_prefix) + '事件(%s)定义配置不存在' % utf8(event_name)) if event_variable.type not in {'event', 'filter'}: raise RuntimeError( utf8(error_prefix) + '事件(%s)定义配置不正确' % utf8(event_name)) result = get_variable_fields_mapping_from_registry('nebula', event_name) if not result: raise RuntimeError( utf8(error_prefix) + '事件(%s)定义配置不存在' % utf8(event_name)) return result
def _check_count_expression_and_return_type(exp, trigger_event, error_prefix): # source event source_event = exp.source_event if not isinstance(source_event, list) or len(source_event) != 2: raise RuntimeError(utf8(error_prefix) + '事件配置不正确') check_event_exist(source_event[1], error_prefix) if exp.trigger_event != trigger_event: raise RuntimeError(utf8(error_prefix) + '触发事件不一致') if not exp.trigger_fields: raise RuntimeError(utf8(error_prefix) + '触发字段为空') # todo: limitation, only single field is support now if len(exp.trigger_fields) > 1: raise RuntimeError(utf8(error_prefix) + '目前只支持单字段触发') if not exp.groupby or len(exp.groupby) > 1: raise RuntimeError(utf8(error_prefix) + '目前只支持单字段触发') # todo: condition for c in exp.condition: left = c['left'] op = c['op'] right = c['right'] if op == '=': # 特殊的等于变量 continue check_field_exist(source_event[1], left, error_prefix) left_type = get_event_field_type(source_event[1], left, error_prefix) check_condition_support(left_type, op, right, error_prefix) # interval if not exp.interval > 0: raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的时间窗口值' % utf8(exp.interval)) # algorithm if exp.algorithm == 'distinct': # 算法修正 exp.algorithm = 'distinct_count' if exp.algorithm not in {'count', 'distinct_count', 'interval'}: raise RuntimeError(utf8(error_prefix) + '不支持算法(%s)' % utf8(exp.algorithm)) # trigger event / fields trigger_event = exp.trigger_event trigger_fields = exp.trigger_fields if not isinstance(trigger_event, list) or len(trigger_event) != 2: raise RuntimeError(utf8(error_prefix) + '事件配置不正确') check_event_exist(trigger_event[1], error_prefix) for field in trigger_fields: check_field_exist(trigger_event[1], field, error_prefix) # group by for field in exp.groupby: check_field_exist(source_event[1], field, error_prefix) # operand if not exp.operand: raise RuntimeError(utf8(error_prefix) + '算法(%s)缺少统计对象' % utf8(exp.algorithm)) for field in exp.operand: check_field_exist(source_event[1], field, error_prefix) groupby_trigger_matching = True if len(exp.groupby) != len(exp.trigger_fields): groupby_trigger_matching = False else: for groupby_field, trigger_field in zip(exp.groupby, exp.trigger_fields): # check if the data type matching if get_event_field_type(source_event[1], groupby_field, error_prefix) != \ get_event_field_type(trigger_event[1], trigger_field, error_prefix): groupby_trigger_matching = False break if not groupby_trigger_matching: raise RuntimeError(utf8(error_prefix) + '触发维度和统计维度不一致') # 实际数据为int double return 'double'
def _check_location_expression_and_return_type(exp, trigger_event, error_prefix): source_event_field = exp.source_event_field source_event_field = source_event_field.split('.') if not isinstance(source_event_field, list) or len(source_event_field) != 3: raise RuntimeError(utf8(error_prefix) + '不正确的地理位置参数') _, event, field = source_event_field check_event_exist(event, error_prefix) check_field_exist(event, field, error_prefix) if get_event_field_type(event, field) not in {'string', 'str'}: raise RuntimeError(utf8(error_prefix) + 'getlocation的来源字段(%s)不是字符串类型' % utf8(field)) if event != trigger_event[1]: raise RuntimeError(utf8(error_prefix) + 'getlocation的来源事件(%s)与触发事件(%s)不一致' % (utf8(event), utf8(trigger_event[1]))) if exp.op not in {'belong', '!belong', '=', '!='}: raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的操作符' % utf8(exp.op)) if exp.location_type not in {'city', 'province'}: raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的类型' % utf8(exp.location_type)) if not exp.location_value or not filter(lambda _: _ is not None, exp.location_value): raise RuntimeError(utf8(error_prefix) + 'getlocation缺乏参数配置') if exp.op in {'=', '!='} and len(exp.location_value) > 1: raise RuntimeError(utf8(error_prefix) + '等于操作只支持一个变量') # check location string is in event for field in exp.location_value: if not is_ascii(field): # chinese break check_field_exist(trigger_event[1], field, error_prefix) return ''