Python utf8 Examples, threathunter_common.util.utf8 Python Examples

Example #1

0

Show file

File: term_checking.py Project: threathunterX/python_lib

def _check_getvariable_expression_and_return_type(exp, trigger_event, error_prefix):
    if not isinstance(exp.trigger_event, list) or len(exp.trigger_event) != 2:
        raise RuntimeError(utf8(error_prefix) + '事件配置不正确')

    check_event_exist(exp.trigger_event[1], error_prefix)

    if exp.trigger_event != trigger_event:
        raise RuntimeError(utf8(error_prefix) + '触发事件不一致')

    trigger_event = exp.trigger_event
    trigger_fields = exp.trigger_fields
    if not trigger_fields:
        raise RuntimeError(utf8(error_prefix) + '触发字段为空')

    for field in trigger_fields:
        check_field_exist(trigger_event[1], field, error_prefix)

    variable = exp.variable
    if not isinstance(variable, list) or len(variable) != 2:
        raise RuntimeError(utf8(error_prefix) + '变量配置不正确')

    check_variable_exist(variable[1], error_prefix)

    variable_schema = get_variable_schema(variable[1])
    # 实际数据基本为int、double,不过还是看实际类型
    value_type = variable_schema['value']
    # subtype优先，对应到profile等类型；一般类型为type
    return value_type[1] or value_type[0]

Example #2

0

Show file

def gen_filter_from_location_exp(location_exp, trigger_event):
    """
    从getlocation配置中产生表达式

    :return:
    """

    fields = location_exp.source_event_field.split('.')
    if len(fields) != 3 or fields[0] != trigger_event[0] or fields[1] != trigger_event[1]:
        raise RuntimeError('地理位置参数不正确')

    if location_exp.op == '=':
        condition_type = 'locationequals'
    elif location_exp.op == '!=':
        condition_type = '!locationequals'
    elif location_exp.op == 'belong':
        condition_type = 'locationcontainsby'
    elif location_exp.op == '!belong':
        condition_type = '!locationcontainsby'
    else:
        raise RuntimeError('操作不正确')

    location_value = location_exp.location_value
    if isinstance(location_value, list):
        location_value = ','.join(location_value)
    location_value = utf8(location_value)

    return _gen_string_filter(fields[1], fields[2], condition_type, location_value, utf8(location_exp.location_type))

Example #3

0

Show file

def _gen_string_filter(source, string_field, op, op_value, param=''):
    """
    get string field function

    """

    if not op or op not in _string_condition_ops:
        raise RuntimeError('string类型不支持({})操作'.format(utf8(op)))

    if op == 'in':
        parts = op_value.split(',')
        if len(parts) > 10:
            raise RuntimeError('属于最多支持10个属性')

        condition = [_gen_string_filter(source, string_field, '==', part, param).get_dict() for part in parts]
        return Filter('', '', '', '', '', '', 'or', '', condition)

    elif op == '!in':
        parts = op_value.split(',')
        if len(parts) > 10:
            raise RuntimeError('属于最多支持10个属性')

        condition = [_gen_string_filter(source, string_field, '!=', part, param).get_dict() for part in parts]
        return Filter('', '', '', '', '', '', 'and', '', condition)

    if op == 'regex':
        op = 'match'
    elif op == '!regex':
        op = '!match'
    elif op == 'contain':
        op = 'contains'
    elif op == '!contain':
        op = '!contains'

    return Filter(source, string_field, '', '', op, utf8(op_value), 'simple', param, None)

Example #4

0

Show file

File: notice_persist.py Project: threathunterX/python_lib

def parser_notice(*args):
    notice = {}

    for k, v in zip(QUERY_NOTICE_PARAMS, list(args)):
        notice[k] = utf8(v)
    notice['count'] = args[-1]
    return notice

Example #5

0

Show file

File: common_check.py Project: threathunterX/python_lib

def check_and_return_int(str_value, error_prefix=''):
    """
    检查是否可以转化为数字，并且返回
    :param str_value: 字符属性
    :return: 如果可转化为数字，转为数字
    :param error_prefix: 出错提示
    :raise: 无法转换为数字
    """

    try:
        result = int(str_value)
    except:
        raise RuntimeError(
            utf8(error_prefix) + '(%s)不是合理的数字' % utf8(str_value))

    return result

Example #6

0

Show file

File: properties_parser.py Project: threathunterX/python_lib

    def parse(self, data):
        """
        parse text in properties format to python dict
        :param data: text in properties format
        """

        try:
            result = {}
            for _ in data.splitlines():
                _ = utf8(_)
                _ = _.strip()
                if not _:
                    continue
                if _.startswith("#"):
                    continue

                key, value = _.split("=", 1)
                key, value = key.strip(), value.strip()
                if not key:
                    continue

                result[key] = value
            return result
        except Exception as err:
            LOGGER.error(
                "config %s: fail to parse properties config, the error is %s",
                self.name, err)
            return {}

Example #7

0

Show file

def get_python_json_friendly(data):
    if not data:
        return ""
    if "\\x" not in data:
        return data

    data = utf8(data)
    result = bytearray()
    length = len(data)
    cursor = 0
    while cursor < length:
        ch = data[cursor]
        if ch == "\\" and cursor <= length - 4 and data[cursor +
                                                        1].lower() == "x":
            hexvalue = data[cursor + 2:cursor + 4]
            try:
                result.append(int(hexvalue, 16))
                cursor += 4
                continue
            except Exception as err:
                pass

        result.append(ch)
        cursor += 1
    return str(result)

Example #8

0

Show file

File: common_check.py Project: threathunterX/python_lib

def check_and_return_pattern(str_value, error_prefix=''):
    """
    检查是否可以转化为正则表达式, 并且返回
    :param str_value: 字符属性
    :return: 如果可转化为正则，返回正则
    :param error_prefix: 出错提示
    :raise: 无法转换为正则
    """

    try:
        result = re.compile(str_value)
    except:
        raise RuntimeError(
            utf8(error_prefix) + '(%s)不是合理的正则表达式' % utf8(str_value))

    return result

Example #9

0

Show file

File: term_checking.py Project: threathunterX/python_lib

def _check_exp_and_return_type(exp, trigger_event, error_prefix):
    exp_checking_fn_dict = {
        ('event', ''): _check_event_expression_and_return_type,
        ('constant', ''): _check_constant_expression_and_return_type,
        ('func', 'getvariable'): _check_getvariable_expression_and_return_type,
        ('func', 'count'): _check_count_expression_and_return_type,
        ('func', 'setblacklist'): _check_setblacklist_expression_and_return_type,
        ('func', 'time'): _check_time_expression_and_return_type,
        ('func', 'sleep'): _check_sleep_expression_and_return_type,
        ('func', 'spl'): _check_spl_expression_and_return_type,
        ('func', 'getlocation'): _check_location_expression_and_return_type
    }

    fn = exp_checking_fn_dict.get((exp.type, exp.subtype))
    if not fn:
        raise RuntimeError(utf8(error_prefix) + '表达式(%s:%s)不支持' % (utf8(exp.type), utf8(exp.subtype)))

    return fn(exp, trigger_event, error_prefix)

Example #10

0

Show file

def _gen_number_filter(source, number_field, op, op_value, is_long=False):
    """
    get number field function

    """

    if not op or op not in _number_condition_ops:
        raise RuntimeError('数字类型不支持({})操作'.format(utf8(op)))

    if op == 'between':
        parts = op_value.split(',')
        if len(parts) != 2:
            raise RuntimeError('介于需要两个参数')
        left, right = parts
        left_condition = Filter(source, number_field, '', '', '>=', utf8(left), 'simple', '', None).get_dict()
        right_condition = Filter(source, number_field, '', '', '<=', utf8(right), 'simple', '', None).get_dict()
        return Filter('', '', '', '', '', '', 'and', '', [left_condition, right_condition])
    elif op == 'in':
        parts = op_value.split(',')
        if len(parts) > 10:
            raise RuntimeError('属于最多支持10个属性')

        condition = [_gen_number_filter(source, number_field, '==', part, is_long).get_dict() for part in parts]
        return Filter('', '', '', '', '', '', 'or', '', condition)
    elif op == '!in':
        parts = op_value.split(',')
        if len(parts) > 10:
            raise RuntimeError('属于最多支持10个属性')

        condition = [_gen_number_filter(source, number_field, '!=', part, is_long).get_dict() for part in parts]
        return Filter('', '', '', '', '', '', 'and', '', condition)

    # simple condition
    try:
        if is_long:
            op_value = int(op_value)
        else:
            op_value = float(op_value)
    except:
        raise RuntimeError('({})不是数字'.format(utf8(op_value)))

    return Filter(source, number_field, '', '', op, utf8(op_value), 'simple', '', None)

Example #11

0

Show file

def gen_function(method, field):
    """
    根据配置产生count/distinct count的聚合算子。

    :return:
    """

    if not method or method not in {'count', 'distinct_count'}:
        raise RuntimeError('不支持操作({})'.format(utf8(method)))

    return Function(method, '', field, '', '', '')

Example #12

0

Show file

def get_variable_schema(variable_name, error_prefix=''):
    """
    获取某个变量的schema，返回为一个字典{字段名称：字段属性}
    :param variable_name: 变量名称
    :param error_prefix: 出错提示

    :return: dict(field->type)
    """

    variable = get_variable_from_registry('nebula', variable_name)
    if not variable:
        raise RuntimeError(
            utf8(error_prefix) + '变量(%s)定义配置不存在' % utf8(variable_name))

    result = get_variable_fields_mapping_from_registry('nebula', variable_name)
    if not result:
        raise RuntimeError(
            utf8(error_prefix) + '变量(%s)定义配置不存在' % utf8(variable_name))

    return result

Example #13

0

Show file

    def add_metrics(self,
                    db,
                    metrics_name,
                    tags,
                    value,
                    expire_seconds=None,
                    timestamp=None):
        self.idb.switch_database(db)
        tags_dict = dict()
        fields = dict()
        if tags:
            for k, v in tags.iteritems():
                tags_dict[utf8(k)] = utf8(v)
        fields["value"] = float(value)

        point = {
            "measurement": metrics_name,
            "tags": tags_dict,
            "fields": fields
        }
        self.idb.write_points([point], database=db, time_precision="ms")

Example #14

0

Show file

def gen_ordinary_filter(source_name, field, op, op_value):
    """
    根据操作符和操作数产生普通的条件配置

    """

    if not field:
        raise RuntimeError('条件的属性为空')

    field_type = get_variable_field_type(source_name, field)
    if not field_type:
        raise RuntimeError('无法从事件{}中取得字段{}的定义'.format(utf8(source_name), utf8(field)))

    if field_type == 'string':
        return _gen_string_filter(source_name, field, op, op_value)
    elif field_type == 'long':
        return _gen_number_filter(source_name, field, op, op_value, is_long=True)
    elif field_type == 'double':
        return _gen_number_filter(source_name, field, op, op_value, is_long=False)
    else:
        raise RuntimeError('不支持类型为({})的条件'.format(utf8(field_type)))

Example #15

0

Show file

File: common_check.py Project: threathunterX/python_lib

def check_condition_support(left_type,
                            operation_exp,
                            right_value,
                            error_prefix=''):
    """
    检查左右表达式是否合法, 以及操作是否合法

    :param left_type: 左值数据类型
    :param operation_exp: 操作符
    :param right_value: 右值
    :param error_prefix: 出错提示
    :return:
    """

    support = False
    if left_type in ('long', 'int', 'double', 'float'):
        if operation_exp in expressions_supported_on_number:
            support = True

    elif left_type in ('str', 'string'):
        if operation_exp in expressions_supported_on_string:
            support = True
    else:
        raise RuntimeError(
            utf8(error_prefix) + '不支持类型为(%s)的操作' % utf8(left_type))

    if not support:
        raise RuntimeError(utf8(error_prefix) + '类型(%s)不支持操作(%s)' \
                           % (utf8(left_type), utf8(expression_to_name_of_operations[operation_exp])))

    check_right_exp(left_type, operation_exp, right_value, error_prefix)
    if 'between' in operation_exp:
        # should be two arguments
        if len(right_value.split(',')) != 2:
            raise RuntimeError(utf8(error_prefix) + '介于需要逗号分隔的两个数字')

Example #16

0

Show file

File: term_checking.py Project: threathunterX/python_lib

def check_term(term, trigger_event, error_prefix):
    """
    检查term正确性.

    :param term:
    :param trigger_event:
    :param error_prefix:
    :return:
    """

    if term.remark is None and term.left.subtype != 'setblacklist':
        raise RuntimeError(utf8(error_prefix) + '条款描述为空')

    if term.scope not in {'realtime', 'profile'}:
        raise RuntimeError(utf8(error_prefix) + '条款的适用类型错误')

    left = term.left
    if left is None:
        raise RuntimeError(utf8(error_prefix) + '条款左表达式为空')
    return_type = _check_exp_and_return_type(left, trigger_event, utf8(error_prefix) + '左表达式>>')

    if left.subtype in {'setblacklist', 'time', 'getlocation', 'sleep', 'spl'}:  # @todo no right set
        # no right exp
        pass
    else:
        right = term.right
        if right is None:
            raise RuntimeError(utf8(error_prefix) + '条款右表达式为空')
        _check_exp_and_return_type(right, trigger_event, utf8(error_prefix) + '右表达式>>')

        if left.type == 'event':
            check_condition_support(return_type, term.op, right.value, error_prefix)
        else:
            # 其他类型都默认为value字段
            check_condition_support(return_type, term.op, right.value, error_prefix)

Example #17

0

Show file

    def add_metrics(self,
                    db,
                    metrics_name,
                    tags,
                    value,
                    expire_seconds,
                    timestamp=None):
        self.idb.switch_database(db)
        fields = dict()
        if tags:
            for k, v in tags.iteritems():
                fields[utf8(k)] = utf8(v)
        fields["value"] = value
        if timestamp:
            fields["time"] = timestamp

        point = {
            "name": metrics_name,
            "columns": fields.keys(),
            "points": [fields.values()]
        }
        self.idb.write_points([point], database=db, time_precision="ms")

Example #18

0

Show file

File: term_checking.py Project: threathunterX/python_lib

def _check_constant_expression_and_return_type(exp, trigger_event, error_prefix):
    """
    检验event expression的右值，应该是字符串常量.

    :param exp:
    :param trigger_event:
    :param error_prefix:
    :return:
    """

    if exp.value is None:
        raise RuntimeError(utf8(error_prefix) + '常量不能为NULL值')
    # todo value type
    return 'string'

Example #19

0

Show file

def gen_ip_trigger_variable_from_strategy(strategy,
                                          trigger_variable_name,
                                          is_delay=False):
    """
    产生ip维度的trigger变量
    new mode

    :return:
    """

    if not strategy:
        return

    conditions = []
    trigger_event = get_trigger_event(strategy, is_delay)

    for t in strategy.terms:
        left = t.left
        if left.type == 'event':
            c = gen_filter_from_event_exp(left, t.op, t.right)
            if c:
                conditions.append(c)

    # location condition has lower priority
    for t in strategy.terms:
        left = t.left
        if left.type == 'func' and left.subtype == 'getlocation':
            c = gen_filter_from_location_exp(left, trigger_event)
            if c:
                conditions.append(c)

    total_filter = {}
    if is_delay:
        total_filter = gen_ordinary_filter(trigger_event[1], 'delay_strategy',
                                           '==', strategy.name).get_dict()
    else:
        if conditions:
            conditions = [_.get_dict() for _ in conditions]
            total_filter = Filter('', '', '', '', '', '', 'and', '',
                                  conditions).get_dict()
    remark = 'ip trigger for strategy {}'.format(utf8(strategy.name))
    variable = VariableModel('realtime', 'nebula', trigger_variable_name,
                             remark, remark, '', 'enable', 'filter', '', '',
                             '', [{
                                 'app': trigger_event[0],
                                 'name': trigger_event[1]
                             }], total_filter, {}, {}, ['c_ip'])
    add_variable_to_registry(variable)

    return variable

Example #20

0

Show file

File: notice_persist.py Project: threathunterX/python_lib

def gen_notice_statistics():
    """
    查询历史notice，并且统计命中tag，存入notice_stat数据库表，生成风险名单报表
    """
    logger.info('开始统计风险名单')
    start_time = int(settings.Working_TS) * 1000
    end_time = start_time + 60 * 60 * 1000
    strategies_weigh = utils.Strategies_Weigh

    try:
        # 初始化数据库连接
        conn = yield tornado_mysql.connect(**config)
        cursor = conn.cursor()
        query_params = ','.join(QUERY_NOTICE_PARAMS)
        insert_values = []
        yield cursor.execute(
            NOTICE_QUERY_STRING %
            (query_params, start_time, end_time, query_params))

        for _ in cursor:
            # 将查询结果解析为notice dict，并且根据命中策略名查询命中tag
            notice = parser_notice(*_)
            notice['timestamp'] = start_time
            notice['last_modified'] = millis_now()

            if notice['strategy_name'] in strategies_weigh:
                tags = strategies_weigh.get(notice['strategy_name'],
                                            {}).get('tags', [])

                # 将每一个命中tag和统计后的notice组合存入数据库
                for tag in tags:
                    notice['tag'] = utf8(tag)
                    insert_values.append(
                        [notice[p] for p in INSERT_NOTICE_PARAMS])

        # 避免重复插入数据，需要先删除该时段数据，重新插入
        yield cursor.execute(NOTICE_DELETE_STRING % start_time)
        yield cursor.executemany(NOTICE_INSERT_STRING, insert_values)

        # 提交，不然无法保存新建数据
        conn.commit()
        # 关闭游标
        cursor.close()
        # 关闭连接
        conn.close()
        logger.info('风险名单统计完成')
    except Exception as e:
        logger.error(e)
        logger.error('风险名单统计失败')

Example #21

0

Show file

File: term_checking.py Project: threathunterX/python_lib

def _check_setblacklist_expression_and_return_type(exp, trigger_event, error_prefix):
    if exp.name not in {'VISITOR', 'ORDER', 'ACCOUNT', 'TRANSACTION', 'MARKETING', 'OTHER'}:
        raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的场景' % utf8(exp.name))

    if exp.check_type not in {'IP', 'USER', 'DeviceID', 'OrderID'} \
            and (exp.check_type.find('(') == -1 or exp.check_type.find(')') == -1):
        raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的值类型' % utf8(exp.check_type))

    if exp.decision not in {'accept', 'review', 'reject'}:
        raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的黑名单处置方式' % utf8(exp.decision))

    if not exp.ttl > 0:
        raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的ttl值' % utf8(exp.ttl))

    # check_value, 黑名单字段
    check_field_exist(trigger_event[1], exp.check_value, error_prefix)
    return ''

Example #22

0

Show file

File: general.py Project: threathunterX/nebula_web

    def get(self, ):
        """
        Get the geo information of one ip

        @API
        summary: ip geo information
        notes: Get the geo information of one ip
        tags:
          - platform
        parameters:
          -
            name: ip
            in: query
            required: false
            type: string
            description: ip address
          -
            name: mobile
            in: query
            required: false
            type: string
            description: mobile phone number
        produces:
          - application/json
        """
        from threathunter_common.geo.geoutil import get_ip_location
        from threathunter_common.util import utf8
        from threathunter_common.geo.phonelocator import get_geo
        import ipaddr
        ip = self.get_argument('ip', "")
        mobile = self.get_argument('mobile', "")

        result = "未知"
        try:
            if ip:
                ip_addr = ipaddr.IPAddress(ip)
                if ip_addr.is_loopback or ip_addr.is_private:
                    result = "内网地址"
                else:
                    result = get_ip_location(ip)
            elif mobile:
                result = get_geo(mobile)
        except Exception as ignore:
            pass

        result = utf8(result)
        self.finish(json.dumps({"address": result}))

Example #23

0

Show file

def gen_dimension_trigger_variable_from_strategy(strategy,
                                                 trigger_variable_name,
                                                 dimension,
                                                 is_delay=False):
    """
    产生uid/did维度的trigger变量
    new mode

    :return:
    """

    trigger_event = get_trigger_event(strategy, is_delay)

    # mappings, getvariable和count的触发字段必须包含在trigger event，这样在collect variable才能获取
    dimension_count = 0
    for t in strategy.terms:
        left = t.left

        # only care realtime vars
        if t.scope != 'realtime':
            continue

        # only dimension related
        if left.subtype in {'getvariable', 'count'}:
            if get_dimension_from_trigger_keys(
                    left.trigger_fields) != dimension:
                continue
            else:
                dimension_count += 1

    if not dimension_count:
        return None

    dimension_field = get_field_from_dimension(dimension)
    filter_dict = gen_ordinary_filter(trigger_event[1], dimension_field, '!=',
                                      '').get_dict()

    remark = '{} trigger for strategy {}'.format(dimension,
                                                 utf8(strategy.name))
    variable = VariableModel('realtime', 'nebula', trigger_variable_name,
                             remark, remark, '', 'enable', 'filter', '', '',
                             '', [{
                                 'app': trigger_event[0],
                                 'name': trigger_event[1]
                             }], filter_dict, {}, {}, [dimension_field])
    add_variable_to_registry(variable)
    return variable

Example #24

0

Show file

File: term_checking.py Project: threathunterX/python_lib

def _check_event_expression_and_return_type(exp, trigger_event, error_prefix):
    """
    检验event expression中的event存在性，并且返回使用字段的数据类型
    :param exp:
    :param trigger_event:
    :param error_prefix:
    :return:
    """

    event = exp.event
    field = exp.field

    if not isinstance(event, list) or len(event) != 2:
        raise RuntimeError(utf8(error_prefix) + '事件配置不正确')

    check_event_exist(event[1], error_prefix)
    check_variable_exist(event[1], error_prefix)
    check_field_exist(event[1], field, error_prefix)
    return get_event_field_type(event[1], field, error_prefix)

Example #25

0

Show file

def extract_body_from_text(text):
    if not text:
        return ""
    text = utf8(text)
    result = bytearray()
    length = len(text)
    cursor = 0
    while cursor < length:
        ch = text[cursor]
        if ch == "\\" and cursor <= length - 4:
            octvalue = text[cursor+1:cursor+4]
            if octvalue.isdigit():
                result.append(int(octvalue, 8))
                cursor += 4
                continue

        result.append(ch)
        cursor += 1

    return str(result)

Example #26

0

Show file

def get_variable_field_type(variable_name, field_name, error_prefix=''):
    """
    获取某个变量的某个字段的类型
    """

    schema = get_variable_schema(variable_name)
    result_type = schema.get(field_name)
    if not result_type:
        raise RuntimeError(
            utf8(error_prefix) + '变量(%s)不包含字段(%s)' %
            (utf8(variable_name), utf8(field_name)))

    # 策略中的目前基本是基本类型
    if result_type[1]:
        raise RuntimeError(
            utf8(error_prefix) + '暂不支持%s(%s)这种复杂数据类型' %
            (utf8(field_name), utf8(result_type)))
    return result_type[0]

Example #27

0

Show file

def get_event_field_type(event_name, field_name, error_prefix=''):
    """
    获取某个事件的某个字段的类型
    :param event_name: 事件名称
    :param field_name: 字段名称
    :param error_prefix: 出错提示
    :return: field type
    """

    schema = get_event_schema(event_name)
    result_type = schema.get(field_name)
    if not result_type:
        raise RuntimeError(
            utf8(error_prefix) + '事件(%s)不包含字段(%s)' %
            (utf8(event_name), utf8(field_name)))

    # 策略中的目前基本是基本类型
    if result_type[1]:
        raise RuntimeError(
            utf8(error_prefix) + '暂不支持%s(%s)这种复杂数据类型' %
            (utf8(field_name), utf8(result_type)))
    return result_type[0]

Example #28

0

Show file

def get_event_schema(event_name, error_prefix=''):
    """
    获取某个事件的schema，返回为一个字典{字段名称：字段属性}
    :param event_name: 事件名称
    :param error_prefix: 出错提示

    :return: dict(field->type)
    """

    # 策略里的event实际从event/filter variable里面来
    event_variable = get_variable_from_registry('nebula', event_name)
    if not event_variable:
        raise RuntimeError(
            utf8(error_prefix) + '事件(%s)定义配置不存在' % utf8(event_name))
    if event_variable.type not in {'event', 'filter'}:
        raise RuntimeError(
            utf8(error_prefix) + '事件(%s)定义配置不正确' % utf8(event_name))

    result = get_variable_fields_mapping_from_registry('nebula', event_name)
    if not result:
        raise RuntimeError(
            utf8(error_prefix) + '事件(%s)定义配置不存在' % utf8(event_name))

    return result

Example #29

0

Show file

File: term_checking.py Project: threathunterX/python_lib

def _check_count_expression_and_return_type(exp, trigger_event, error_prefix):
    # source event

    source_event = exp.source_event
    if not isinstance(source_event, list) or len(source_event) != 2:
        raise RuntimeError(utf8(error_prefix) + '事件配置不正确')

    check_event_exist(source_event[1], error_prefix)

    if exp.trigger_event != trigger_event:
        raise RuntimeError(utf8(error_prefix) + '触发事件不一致')

    if not exp.trigger_fields:
        raise RuntimeError(utf8(error_prefix) + '触发字段为空')

    # todo: limitation, only single field is support now
    if len(exp.trigger_fields) > 1:
        raise RuntimeError(utf8(error_prefix) + '目前只支持单字段触发')
    if not exp.groupby or len(exp.groupby) > 1:
        raise RuntimeError(utf8(error_prefix) + '目前只支持单字段触发')

    # todo: condition
    for c in exp.condition:
        left = c['left']
        op = c['op']
        right = c['right']

        if op == '=':
            # 特殊的等于变量
            continue

        check_field_exist(source_event[1], left, error_prefix)
        left_type = get_event_field_type(source_event[1], left, error_prefix)
        check_condition_support(left_type, op, right, error_prefix)

    # interval
    if not exp.interval > 0:
        raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的时间窗口值' % utf8(exp.interval))

    # algorithm
    if exp.algorithm == 'distinct':
        # 算法修正
        exp.algorithm = 'distinct_count'
    if exp.algorithm not in {'count', 'distinct_count', 'interval'}:
        raise RuntimeError(utf8(error_prefix) + '不支持算法(%s)' % utf8(exp.algorithm))

    # trigger event / fields
    trigger_event = exp.trigger_event
    trigger_fields = exp.trigger_fields

    if not isinstance(trigger_event, list) or len(trigger_event) != 2:
        raise RuntimeError(utf8(error_prefix) + '事件配置不正确')

    check_event_exist(trigger_event[1], error_prefix)
    for field in trigger_fields:
        check_field_exist(trigger_event[1], field, error_prefix)

    # group by
    for field in exp.groupby:
        check_field_exist(source_event[1], field, error_prefix)

    # operand
    if not exp.operand:
        raise RuntimeError(utf8(error_prefix) + '算法(%s)缺少统计对象' % utf8(exp.algorithm))

    for field in exp.operand:
        check_field_exist(source_event[1], field, error_prefix)

    groupby_trigger_matching = True
    if len(exp.groupby) != len(exp.trigger_fields):
        groupby_trigger_matching = False
    else:
        for groupby_field, trigger_field in zip(exp.groupby, exp.trigger_fields):
            # check if the data type matching
            if get_event_field_type(source_event[1], groupby_field, error_prefix) != \
                    get_event_field_type(trigger_event[1], trigger_field, error_prefix):
                groupby_trigger_matching = False
                break
    if not groupby_trigger_matching:
        raise RuntimeError(utf8(error_prefix) + '触发维度和统计维度不一致')

    # 实际数据为int double
    return 'double'

Example #30

0

Show file

File: term_checking.py Project: threathunterX/python_lib

def _check_location_expression_and_return_type(exp, trigger_event, error_prefix):
    source_event_field = exp.source_event_field
    source_event_field = source_event_field.split('.')
    if not isinstance(source_event_field, list) or len(source_event_field) != 3:
        raise RuntimeError(utf8(error_prefix) + '不正确的地理位置参数')

    _, event, field = source_event_field
    check_event_exist(event, error_prefix)
    check_field_exist(event, field, error_prefix)

    if get_event_field_type(event, field) not in {'string', 'str'}:
        raise RuntimeError(utf8(error_prefix) + 'getlocation的来源字段(%s)不是字符串类型' % utf8(field))

    if event != trigger_event[1]:
        raise RuntimeError(utf8(error_prefix) +
                           'getlocation的来源事件(%s)与触发事件(%s)不一致' % (utf8(event), utf8(trigger_event[1])))

    if exp.op not in {'belong', '!belong', '=', '!='}:
        raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的操作符' % utf8(exp.op))

    if exp.location_type not in {'city', 'province'}:
        raise RuntimeError(utf8(error_prefix) + '(%s)不是正确的类型' % utf8(exp.location_type))

    if not exp.location_value or not filter(lambda _: _ is not None, exp.location_value):
        raise RuntimeError(utf8(error_prefix) + 'getlocation缺乏参数配置')

    if exp.op in {'=', '!='} and len(exp.location_value) > 1:
        raise RuntimeError(utf8(error_prefix) + '等于操作只支持一个变量')

    # check location string is in event
    for field in exp.location_value:
        if not is_ascii(field):
            # chinese
            break
        check_field_exist(trigger_event[1], field, error_prefix)
    return ''