Esempio n. 1
0
def test_cache_delete(cache: Cache):
    """Test that cache.delete() removes a cache key."""
    key, value = ("key", "value")
    cache.set(key, value)
    assert cache.has(key)

    cache.delete(key)
    assert not cache.has(key)

    cache.delete(key)
    assert not cache.has(key)
class tcp_http_pcap():
    def __init__(self, pcap_collection_data, max_queue_size, work_queue,
                 interface, custom_tag, return_deep_info, http_filter_json,
                 cache_size, session_size, bpf_filter, timeout, debug):
        """
		构造函数
		:param max_queue_size: 资产队列最大长度
		:param work_queue: 捕获资产数据消息发送队列
		:param interface: 捕获流量的网卡名
		:param custom_tag: 数据标签,用于区分不同的采集引擎
		:param return_deep_info: 是否处理更多信息,包括原始请求、响应头和正文
		:param http_filter_json: HTTP过滤器配置,支持按状态和内容类型过滤
		:param cache_size: 缓存的已处理数据条数,120秒内重复的数据将不会重复采集
		:param session_size: 缓存的HTTP/TCP会话数量,30秒未使用的会话将被自动清除
		:param bpf_filter: 数据包底层过滤器
		:param timeout: 采集程序的运行超时时间,默认为启动后1小时自动退出
		:param debug: 调试开关
		"""
        self.pcap_collection_data = pcap_collection_data
        self.total_msg_num = 0
        self.max_queue_size = max_queue_size
        self.work_queue = work_queue
        self.debug = debug
        self.timeout = timeout
        self.bpf_filter = bpf_filter
        self.cache_size = cache_size
        self.session_size = session_size
        self.http_filter_json = http_filter_json
        self.return_deep_info = return_deep_info
        self.custom_tag = custom_tag
        self.interface = interface
        self.sniffer = pcap.pcap(self.interface,
                                 snaplen=65535,
                                 promisc=True,
                                 timeout_ms=self.timeout,
                                 immediate=False)
        self.sniffer.setfilter(self.bpf_filter)
        self.tcp_stream_cache = Cache(maxsize=self.session_size,
                                      ttl=30,
                                      timer=time.time,
                                      default=None)
        if self.cache_size:
            self.tcp_cache = LRUCache(maxsize=self.cache_size,
                                      ttl=120,
                                      timer=time.time,
                                      default=None)
            self.http_cache = LRUCache(maxsize=self.cache_size,
                                       ttl=120,
                                       timer=time.time,
                                       default=None)
        # http数据分析正则
        self.decode_request_regex = re.compile(
            r'^([A-Z]+) +([^ \r\n]+) +HTTP/\d+(?:\.\d+)?[^\r\n]*(.*?)$', re.S)
        self.decode_response_regex = re.compile(
            r'^HTTP/(\d+(?:\.\d+)?) (\d+)[^\r\n]*(.*?)$', re.S)
        self.decode_body_regex = re.compile(
            rb'<meta[^>]+?charset=[\'"]?([a-z\d\-]+)[\'"]?', re.I)

    def run(self):
        """
		入口函数
		"""
        for ts, pkt in self.sniffer:
            # self.total_msg_num += 1
            # if self.total_msg_num%1000 == 0:
            # 	print("Packet analysis rate: %s"%(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())+" - "+str(self.total_msg_num)))
            packet = self.pkt_decode(pkt)
            if not packet:
                continue

            # print('{}:{}->{}:{}: Seq:{}, Ack:{}, Flag: {}, Len: {}'.format(packet.src, packet.sport, packet.dst, packet.dport, packet.ack, packet.seq, packet.flags, len(packet.data)))
            cache_key = '{}:{}'.format(packet.src, packet.sport)
            # SYN & ACK
            if packet.flags == 0x12:
                if self.cache_size and self.tcp_cache.get(cache_key):
                    continue

                self.tcp_stream_cache.set('S_{}'.format(packet.ack),
                                          packet.seq + 1)

            # ACK || PSH-ACK
            elif packet.flags in [0x10, 0x18, 0x19]:
                # 长度为0的数据包不处理
                if len(packet.data) == 0:
                    continue

                # 第一个有数据的请求包,先缓存下来
                # Seq == SYN-ACK Ack
                pre_cs_seq = self.tcp_stream_cache.get('S_{}'.format(
                    packet.seq))
                if pre_cs_seq:
                    c_s_key = 'C_{}'.format(packet.ack)

                    self.tcp_stream_cache.set(c_s_key, packet.data)
                    self.tcp_stream_cache.delete('S_{}'.format(packet.seq))
                    continue

                # 1. 提取服务器主动响应的通讯,例如:MySQL
                # Seq == SYN-ACK Seq + 1
                if 'TCP' in self.pcap_collection_data:
                    pre_sc_seq = self.tcp_stream_cache.get('S_{}'.format(
                        packet.ack))
                    if pre_sc_seq == packet.seq:
                        self.tcp_stream_cache.delete('S_{}'.format(packet.ack))

                        # TCP瞬时重复处理
                        if self.cache_size:
                            self.tcp_cache.set(cache_key, True)

                        data = {
                            'pro': 'TCP',
                            'tag': self.custom_tag,
                            'ip': packet.src,
                            'port': packet.sport,
                            'data': packet.data.hex()
                        }
                        self.send_msg(data)
                        continue

                # 2. 提取需要请求服务器才会响应的通讯,例如:HTTP
                # Seq == PSH ACK(C->S) Ack
                send_data = self.tcp_stream_cache.get('C_{}'.format(
                    packet.seq))
                # 判断是否存在请求数据
                if send_data:
                    # 删除已使用的缓存
                    self.tcp_stream_cache.delete('C_{}'.format(packet.seq))

                    # HTTP通讯采集判断
                    if 'HTTP' in self.pcap_collection_data and packet.data[:
                                                                           5] == b'HTTP/':
                        request_dict = self.decode_request(
                            send_data, packet.src, str(packet.sport))
                        if not request_dict:
                            continue

                        http_cache_key = '{}:{}'.format(
                            request_dict['method'], request_dict['uri'])
                        if self.cache_size and self.http_cache.get(
                                http_cache_key):
                            continue

                        response_dict = self.decode_response(packet.data)
                        if response_dict:
                            # HTTP瞬时重复处理
                            if self.cache_size:
                                self.http_cache.set(http_cache_key, True)

                            response_code = response_dict['status']
                            content_type = response_dict['type']

                            # 根据响应状态码和页面类型进行过滤
                            if self.http_filter_json:
                                filter_code = self.http_filter(
                                    'response_code',
                                    response_code) if response_code else False
                                filter_type = self.http_filter(
                                    'content_type',
                                    content_type) if content_type else False
                                if filter_code or filter_type:
                                    continue

                            data = {
                                'pro': 'HTTP',
                                'tag': self.custom_tag,
                                'ip': packet.src,
                                'port': packet.sport,
                                'method': request_dict['method'],
                                'code': response_code,
                                'type': content_type,
                                'server': response_dict['server'],
                                'header': response_dict['headers'],
                                'url': request_dict['uri'],
                                'body': response_dict['body']
                            }

                            self.send_msg(data)
                            continue

                    # TCP通讯采集判断
                    elif 'TCP' in self.pcap_collection_data:
                        # TCP瞬时重复处理
                        if self.cache_size:
                            self.tcp_cache.set(cache_key, True)

                        # 2.2 非 HTTP 通讯
                        data = {
                            'pro': 'TCP',
                            'tag': self.custom_tag,
                            'ip': packet.src,
                            'port': packet.sport,
                            'data': packet.data.hex()
                        }
                        self.send_msg(data)

        self.sniffer.close()

    def http_filter(self, key, value):
        """
		检查字符串中是否包含特定的规则
		:param key: 规则键名,response_code(状态码)或 content_type(内容类型)
		:param value: 要检查的字符串
		:return: True - 包含, False - 不包含
		"""
        if key in self.http_filter_json:
            for rule in self.http_filter_json[key]:
                if rule in value:
                    return True
        return False

    def pkt_decode(self, pkt):
        try:
            ip_type = ''
            packet = dpkt.ethernet.Ethernet(pkt)
            if isinstance(packet.data, dpkt.ip.IP):
                ip_type = 'ip4'
            elif isinstance(packet.data, dpkt.ip6.IP6):
                ip_type = 'ip6'
            if ip_type and isinstance(packet.data.data, dpkt.tcp.TCP):
                if packet.data.data.flags == 0x12 or \
                 packet.data.data.flags in [0x10, 0x18, 0x19] and len(packet.data.data.data) > 0:
                    tcp_pkt = packet.data.data
                    if ip_type == 'ip4':
                        tcp_pkt.src = self.ip_addr(packet.data.src)
                        tcp_pkt.dst = self.ip_addr(packet.data.dst)
                    else:
                        tcp_pkt.src = self.ip6_addr(''.join(
                            ['%02X' % x for x in packet.data.src]))
                        tcp_pkt.dst = self.ip6_addr(''.join(
                            ['%02X' % x for x in packet.data.dst]))
                    return tcp_pkt
        except KeyboardInterrupt:
            print('\nExit.')
            os.kill(os.getpid(), signal.SIGKILL)
        except Exception as e:
            # print(str(e))
            # print(("".join(['%02X ' % b for b in pkt])))
            pass
        return None

    def ip_addr(self, ip):
        return '%d.%d.%d.%d' % tuple(ip)

    def ip6_addr(self, ip6):
        ip6_addr = ''
        ip6_list = re.findall(r'.{4}', ip6)
        for i in range(len(ip6_list)):
            ip6_addr += ':%s' % (ip6_list[i].lstrip('0')
                                 if ip6_list[i].lstrip('0') else '0')
        return ip6_addr.lstrip(':')

    def decode_request(self, data, sip, sport):
        pos = data.find(b'\r\n\r\n')
        body = data[pos + 4:] if pos > 0 else b''
        data_str = str(data[:pos] if pos > 0 else data, 'utf-8', 'ignore')
        m = self.decode_request_regex.match(data_str)
        if m:
            if m.group(2)[:1] != '/':
                return None

            headers = m.group(3).strip() if m.group(3) else ''
            header_dict = self.parse_headers(headers)
            host_domain = ''
            # host domain
            if 'host' in header_dict and re.search('[a-zA-Z]',
                                                   header_dict['host']):
                host_domain = header_dict['host']
            # host ip
            else:
                host_domain = sip + ':' + sport if sport != '80' else sip
            url = 'http://{}{}'.format(
                host_domain, m.group(2)) if host_domain else m.group(2)

            return {
                'method': m.group(1) if m.group(1) else '',
                'uri': url,
                'headers': headers,
                'body': str(body, 'utf-8', 'ignore')
            }

        return {
            'method':
            '',
            'uri':
            'http://{}:{}/'.format(sip if ':' not in sip else '[' + sip + ']',
                                   sport),
            'headers':
            '',
            'body':
            ''
        }

    def decode_response(self, data):
        pos = data.find(b'\r\n\r\n')
        body = data[pos + 4:] if pos > 0 else b''
        header_str = str(data[:pos] if pos > 0 else data, 'utf-8', 'ignore')
        m = self.decode_response_regex.match(header_str)
        if m:
            headers = m.group(3).strip() if m.group(3) else ''
            headers_dict = self.parse_headers(headers)
            if self.return_deep_info and 'transfer-encoding' in headers_dict and headers_dict[
                    'transfer-encoding'] == 'chunked':
                body = self.decode_chunked(body)

            if self.return_deep_info and 'content-encoding' in headers_dict:
                if headers_dict['content-encoding'] == 'gzip':
                    body = self.decode_gzip(body)
                elif headers_dict['content-encoding'] == 'br':
                    body = self.decode_brotli(body)

            content_type = '' if 'content-type' not in headers_dict else headers_dict[
                'content-type']
            server = '' if 'server' not in headers_dict else headers_dict[
                'server']
            return {
                'version': m.group(1) if m.group(1) else '',
                'status': m.group(2) if m.group(2) else '',
                'headers': headers,
                'type': content_type,
                'server': server,
                'body': self.decode_body(body, content_type)
            }

        return None

    def decode_gzip(self, data):
        '''
		还原 HTTP 响应中采用 gzip 压缩的数据
		标识:
		Content-Encoding: gzip
		'''
        try:
            buf = io.BytesIO(data)
            gf = gzip.GzipFile(fileobj=buf)
            content = gf.read()
            gf.close()

            return content
        except:
            return data

    def decode_brotli(self, data):
        '''
		还原 HTTP 响应中采用 brotli 压缩的数据
		标识:
		Content-Encoding: br
		'''
        try:
            return brotli.decompress(data)
        except:
            return data

    def decode_chunked(self, data):
        '''
		还原 HTTP 响应中被 Chunked 的数据
		示例:
		Transfer-Encoding: chunked

		1b
		{"ret":0, "messge":"error"}
		'''
        line_end = data.find(b'\r\n')
        if line_end > 0:
            data_len = -1
            try:
                data_len = int(data[:line_end], 16)
                if data_len == 0:
                    return b''

                if data_len > 0:
                    new_data = data[line_end + 2:line_end + 2 + data_len]
                    return new_data + self.decode_chunked(
                        data[line_end + 2 + data_len + 2:])
            except:
                return data

        return data

    def decode_body(self, data, content_type):
        charset_white_list = [
            'big5', 'big5-hkscs', 'cesu-8', 'euc-jp', 'euc-kr', 'gb18030',
            'gb2312', 'gbk', 'ibm-thai', 'ibm00858', 'ibm01140', 'ibm01141',
            'ibm01142', 'ibm01143', 'ibm01144', 'ibm01145', 'ibm01146',
            'ibm01147', 'ibm01148', 'ibm01149', 'ibm037', 'ibm1026', 'ibm1047',
            'ibm273', 'ibm277', 'ibm278', 'ibm280', 'ibm284', 'ibm285',
            'ibm290', 'ibm297', 'ibm420', 'ibm424', 'ibm437', 'ibm500',
            'ibm775', 'ibm850', 'ibm852', 'ibm855', 'ibm857', 'ibm860',
            'ibm861', 'ibm862', 'ibm863', 'ibm864', 'ibm865', 'ibm866',
            'ibm868', 'ibm869', 'ibm870', 'ibm871', 'ibm918',
            'iso-10646-ucs-2', 'iso-2022-cn', 'iso-2022-jp', 'iso-2022-jp-2',
            'iso-2022-kr', 'iso-8859-1', 'iso-8859-10', 'iso-8859-13',
            'iso-8859-15', 'iso-8859-16', 'iso-8859-2', 'iso-8859-3',
            'iso-8859-4', 'iso-8859-5', 'iso-8859-6', 'iso-8859-7',
            'iso-8859-8', 'iso-8859-9', 'jis_x0201', 'jis_x0212-1990',
            'koi8-r', 'koi8-u', 'shift_jis', 'tis-620', 'us-ascii', 'utf-16',
            'utf-16be', 'utf-16le', 'utf-32', 'utf-32be', 'utf-32le', 'utf-8',
            'windows-1250', 'windows-1251', 'windows-1252', 'windows-1253',
            'windows-1254', 'windows-1255', 'windows-1256', 'windows-1257',
            'windows-1258', 'windows-31j', 'x-big5-hkscs-2001',
            'x-big5-solaris', 'x-euc-jp-linux', 'x-euc-tw', 'x-eucjp-open',
            'x-ibm1006', 'x-ibm1025', 'x-ibm1046', 'x-ibm1097', 'x-ibm1098',
            'x-ibm1112', 'x-ibm1122', 'x-ibm1123', 'x-ibm1124', 'x-ibm1166',
            'x-ibm1364', 'x-ibm1381', 'x-ibm1383', 'x-ibm300', 'x-ibm33722',
            'x-ibm737', 'x-ibm833', 'x-ibm834', 'x-ibm856', 'x-ibm874',
            'x-ibm875', 'x-ibm921', 'x-ibm922', 'x-ibm930', 'x-ibm933',
            'x-ibm935', 'x-ibm937', 'x-ibm939', 'x-ibm942', 'x-ibm942c',
            'x-ibm943', 'x-ibm943c', 'x-ibm948', 'x-ibm949', 'x-ibm949c',
            'x-ibm950', 'x-ibm964', 'x-ibm970', 'x-iscii91',
            'x-iso-2022-cn-cns', 'x-iso-2022-cn-gb', 'x-iso-8859-11',
            'x-jis0208', 'x-jisautodetect', 'x-johab', 'x-macarabic',
            'x-maccentraleurope', 'x-maccroatian', 'x-maccyrillic',
            'x-macdingbat', 'x-macgreek', 'x-machebrew', 'x-maciceland',
            'x-macroman', 'x-macromania', 'x-macsymbol', 'x-macthai',
            'x-macturkish', 'x-macukraine', 'x-ms932_0213', 'x-ms950-hkscs',
            'x-ms950-hkscs-xp', 'x-mswin-936', 'x-pck', 'x-sjis',
            'x-sjis_0213', 'x-utf-16le-bom', 'x-utf-32be-bom',
            'x-utf-32le-bom', 'x-windows-50220', 'x-windows-50221',
            'x-windows-874', 'x-windows-949', 'x-windows-950',
            'x-windows-iso2022jp'
        ]
        content_type = content_type.lower() if content_type else ''
        if 'charset=' in content_type:
            charset = content_type[content_type.find('charset=') +
                                   8:].strip('" ;\r\n').lower()
            if charset != 'iso-8859-1' and charset in charset_white_list:
                return str(data, charset, 'ignore')

        m = self.decode_body_regex.match(data)
        if m:
            charset = m.group(1).lower() if m.group(1) else ''
            if charset != 'iso-8859-1' and charset in charset_white_list:
                return str(data, charset, 'ignore')

        return str(data, 'utf-8', 'ignore')

    def parse_headers(self, data):
        headers = {}
        lines = data.split('\r\n')
        for _ in lines:
            pos = _.find(':')
            if pos > 0:
                headers[_[:pos].lower()] = _[pos + 1:].strip()
        return headers

    def send_msg(self, data):
        result = json.dumps(data)
        if self.debug:
            print(result)
        if len(self.work_queue) >= self.max_queue_size * 0.95:
            self.work_queue.clear()
        self.work_queue.append(result)
Esempio n. 3
0
# 异步生成器
@cache.memoize()
async def func(a, b):
    return a + b


@cache.memoize()
def func(a, b):
    pass


func.uncached(1, 2)

assert cache.copy() == {1: "foobar", 2: ("foo", "bar", "baz")}

cache.delete(1)
assert cache.get(1) is None

cache.clear()
assert len(cache) == 0

cache.set_many({"a": 1, "b": 2, "c": 3})
assert cache.get_many(["a", "b", "c"]) == {"a": 1, "b": 2, "c": 3}
cache.delete_many(["a", "b", "c"])
assert cache.count() == 0

import re

cache.set_many({"a_1": 1, "a_2": 2, "123": 3, "b": 4})

# func.uncached(1, 2)
Esempio n. 4
0
class Redis(object):
    def __init__(self):
        self._data = Cache()
        self._tokens = Cache()

    def check_token(self, token):
        return token in self._tokens.keys()

    def set_token(self, key, value):
        self._tokens.set(key, value)

    def keys(self, pattern):
        result = []
        for key in self._data.keys():
            if re.search(pattern, key) != None:
                result.append(key)
        return result

    def set(self, key, value, ttl=None):
        self._data.set(key, value, ttl)
        return 1

    def hset(self, hash, key, value, ttl=None):
        self._data.set(hash, key, ttl)
        self._data.set(key, value, ttl)
        return 1

    def lset(self, name_list, index, value, ttl=None):
        if name_list not in self._data.keys():
            array_of_data = [0 for i in range(index + 1)]
            array_of_data[index] = value
        else:
            array_of_data = self.get(name_list)
            if (len(array_of_data) <= index):
                array_of_data += [
                    0 for i in range(index + 1 - len(array_of_data))
                ]
                array_of_data[index] = value
            else:
                array_of_data[index] = value
        self._data.set(name_list, array_of_data, ttl)
        return 1

    def get(self, key):
        return self._data.get(key)

    def hget(self, hash, key):
        find_key = self._data.get(hash)
        if find_key != key:
            return None
        return self._data.get(find_key)

    def lget(self, name_of_list, index):
        array_of_data = self._data.get(name_of_list)
        if array_of_data == None:
            return None
        print(array_of_data)
        if index < len(array_of_data):
            return array_of_data[index]
        else:
            return None

    def delete(self, key):
        if key in self._data.keys():
            self._data.delete(key)
            return 1
        else:
            return None
Esempio n. 5
0
class tcp_http_shark():
    def __init__(self, work_queue, interface, custom_tag, return_deep_info,
                 http_filter_json, cache_size, session_size, bpf_filter,
                 timeout, debug):
        """
		构造函数
		:param work_queue: 捕获资产数据消息发送队列
		:param interface: 捕获流量的网卡名
		:param custom_tag: 数据标签,用于区分不同的采集引擎
		:param return_deep_info: 是否处理更多信息,包括原始请求、响应头和正文
		:param http_filter_json: HTTP过滤器配置,支持按状态和内容类型过滤
		:param cache_size: 缓存的已处理数据条数,120秒内重复的数据将不会发送Syslog
		:param session_size: 缓存的HTTP/TCP会话数量,16秒未使用的会话将被自动清除
		:param bpf_filter: 数据包底层过滤器
		:param timeout: 采集程序的运行超时时间,默认为启动后1小时自动退出
		:param debug: 调试开关
		"""
        self.work_queue = work_queue
        self.debug = debug
        self.timeout = timeout
        self.bpf_filter = bpf_filter
        self.cache_size = cache_size
        self.session_size = session_size
        self.http_filter_json = http_filter_json
        self.return_deep_info = return_deep_info
        self.custom_tag = custom_tag
        self.interface = interface
        self.pktcap = pyshark.LiveCapture(interface=self.interface,
                                          bpf_filter=self.bpf_filter,
                                          use_json=False,
                                          debug=self.debug)
        if self.session_size:
            self.http_stream_cache = Cache(maxsize=self.session_size,
                                           ttl=16,
                                           timer=time.time,
                                           default=None)
            self.tcp_stream_cache = Cache(maxsize=self.session_size,
                                          ttl=16,
                                          timer=time.time,
                                          default=None)
        if self.cache_size:
            self.http_cache = LRUCache(maxsize=self.cache_size,
                                       ttl=120,
                                       timer=time.time,
                                       default=None)
            self.tcp_cache = LRUCache(maxsize=self.cache_size,
                                      ttl=120,
                                      timer=time.time,
                                      default=None)
        # 检测页面编码的正则表达式
        self.encode_regex = re.compile(
            rb'<meta [^>]*?charset=["\']?([a-z\-\d]+)["\'>]?', re.I)

    def http_filter(self, key, value):
        """
		检查字符串中是否包含特定的规则
		:param key: 规则键名,response_code(状态码)或 content_type(内容类型)
		:param value: 要检查的字符串
		:return: True - 包含, False - 不包含
		"""
        if key in self.http_filter_json:
            for rule in self.http_filter_json[key]:
                if rule in value:
                    return True
        return False

    def run(self):
        """
		入口函数
		"""
        try:
            self.pktcap.apply_on_packets(self.proc_packet,
                                         timeout=self.timeout)
        except concurrent.futures.TimeoutError:
            print("\nTimeoutError.")

    def proc_packet(self, pkt):
        """
		全局数据包处理:识别、路由及结果发送
		:param pkt: 数据包
		:return: JSON or None
		"""
        try:
            pkt_json = None
            pkt_dict = dir(pkt)

            if 'ip' in pkt_dict:
                if 'http' in pkt_dict:
                    pkt_json = self.proc_http(pkt)
                elif 'tcp' in pkt_dict:
                    pkt_json = self.proc_tcp(pkt)

            if pkt_json:
                result = json.dumps(pkt_json)
                if self.debug:
                    print(result)
                self.work_queue.append(result)
        except:
            traceback.print_exc()

    def proc_http(self, pkt):
        """
		处理 HTTP 包
		:param pkt: 数据包
		:return: JSON or None
		"""
        http_dict = dir(pkt.http)

        if 'request' in http_dict and self.session_size:
            req = {
                'url':
                pkt.http.request_full_uri
                if 'request_full_uri' in http_dict else pkt.http.request_uri,
                'method':
                pkt.http.request_method
                if 'request_method' in http_dict else ''
            }

            self.http_stream_cache.set(pkt.tcp.stream, req)

        elif 'response' in http_dict:
            pkt_json = {}
            src_addr = pkt.ip.src
            src_port = pkt[pkt.transport_layer].srcport

            if self.session_size:
                cache_req = self.http_stream_cache.get(pkt.tcp.stream)
                if cache_req:
                    pkt_json['url'] = cache_req['url']
                    pkt_json['method'] = cache_req['method']
                    self.http_stream_cache.delete(pkt.tcp.stream)

            if 'url' not in pkt_json:
                if 'response_for_uri' in http_dict:
                    pkt_json["url"] = pkt.http.response_for_uri
                else:
                    pkt_json["url"] = '/'

            # 处理 URL 只有URI的情况
            if pkt_json["url"][0] == '/':
                if src_port == '80':
                    pkt_json["url"] = "http://%s%s" % (src_addr,
                                                       pkt_json["url"])
                else:
                    pkt_json["url"] = "http://%s:%s%s" % (src_addr, src_port,
                                                          pkt_json["url"])

            if self.cache_size:
                # 缓存机制,防止短时间大量处理重复响应
                exists = self.http_cache.get(pkt_json['url'])
                if exists:
                    return None

                self.http_cache.set(pkt_json["url"], True)

            pkt_json["pro"] = 'HTTP'
            pkt_json["tag"] = self.custom_tag
            pkt_json["ip"] = src_addr
            pkt_json["port"] = src_port

            if 'response_code' in http_dict:
                if self.http_filter_json:
                    return_status = self.http_filter('response_code',
                                                     pkt.http.response_code)
                    if return_status:
                        return None
                pkt_json["code"] = pkt.http.response_code

            if 'content_type' in http_dict:
                if self.http_filter_json:
                    return_status = self.http_filter('content_type',
                                                     pkt.http.content_type)
                    if return_status:
                        return None
                pkt_json["type"] = pkt.http.content_type.lower()
            else:
                pkt_json["type"] = 'unkown'

            if 'server' in http_dict:
                pkt_json["server"] = pkt.http.server

            # 开启深度数据分析,返回header和body等数据
            if self.return_deep_info:
                charset = 'utf-8'
                # 检测 Content-Type 中的编码信息
                if 'type' in pkt_json and 'charset=' in pkt_json["type"]:
                    charset = pkt_json["type"][pkt_json["type"].find('charset='
                                                                     ) +
                                               8:].strip().lower()
                    if not charset:
                        charset = 'utf-8'
                if 'payload' in dir(pkt.tcp):
                    payload = bytes.fromhex(
                        str(pkt.tcp.payload).replace(':', ''))
                    if payload.find(b'HTTP/') == 0:
                        split_pos = payload.find(b'\r\n\r\n')
                        if split_pos <= 0 or split_pos > 2048:
                            split_pos = 2048
                        pkt_json["header"] = str(payload[:split_pos], 'utf-8',
                                                 'ignore')
                        data = str(payload[split_pos + 4:], 'utf-8', 'ignore')
                if 'file_data' in http_dict and pkt.http.file_data.raw_value and pkt_json[
                        'type'] != 'application/octet-stream':
                    data = bytes.fromhex(pkt.http.file_data.raw_value)
                elif 'data' in http_dict:
                    data = bytes.fromhex(pkt.http.data)
                elif 'segment_data' in dir(pkt.tcp):
                    data = bytes.fromhex(pkt.tcp.segment_data.replace(":", ""))
                else:
                    data = ''

                if data:
                    # 检测页面 Meta 中的编码信息
                    data_head = data[:500] if data.find(
                        b'</head>', 0,
                        1024) == -1 else data[:data.find(b'</head>')]
                    match = self.encode_regex.search(data_head)
                    if match:
                        charset = str(
                            match.group(1).strip().lower(), 'utf-8', 'ignore')

                    response_body = proc_body_str(str(data, charset, 'ignore'),
                                                  16 * 1024)
                    # response_body = self.proc_body_json(str(data, charset, 'ignore'), 16*1024)
                    pkt_json["body"] = response_body
                else:
                    pkt_json["body"] = ''

            return pkt_json

        return None

    def proc_tcp(self, pkt):
        """
		处理 TCP 包
		:param pkt: 数据包
		:return: JSON or None
		"""
        tcp_stream = pkt.tcp.stream

        pkt_json = {}
        pkt_json["pro"] = 'TCP'
        pkt_json["tag"] = self.custom_tag

        # SYN+ACK
        if pkt.tcp.flags == '0x00000012':
            server_ip = pkt.ip.src
            server_port = pkt[pkt.transport_layer].srcport
            tcp_info = '%s:%s' % (server_ip, server_port)

            if self.cache_size:
                exists = self.tcp_cache.get(tcp_info)
                if exists:
                    return None
                self.tcp_cache.set(tcp_info, True)

            if self.return_deep_info and self.session_size:
                self.tcp_stream_cache.set(tcp_stream, tcp_info)
            else:
                pkt_json["ip"] = server_ip
                pkt_json["port"] = server_port

                return pkt_json

        # -r on开启深度数据分析,采集server第一个响应数据包
        if self.return_deep_info and pkt.tcp.seq == "1" and "payload" in dir(
                pkt.tcp) and self.session_size:
            tcp_info = self.tcp_stream_cache.get(tcp_stream)
            if tcp_info:
                # 防止误处理客户端发第一个包的情况
                src_host = '{}:{}'.format(pkt.ip.src,
                                          pkt[pkt.transport_layer].srcport)
                if tcp_info != src_host:
                    return None

                self.tcp_stream_cache.delete(tcp_stream)

                pkt_json["ip"] = pkt.ip.src
                pkt_json["port"] = pkt[pkt.transport_layer].srcport
                payload_data = pkt.tcp.payload.replace(":", "")
                if payload_data.startswith("48545450"):  # ^HTTP
                    return None

                # HTTPS Protocol
                # TODO: other https port support
                if pkt_json["port"] == "443" and payload_data.startswith(
                        "1603"):  # SSL
                    pkt_json["pro"] = 'HTTPS'
                    pkt_json["url"] = "https://{}/".format(pkt_json["ip"])
                else:
                    pkt_json["data"] = proc_data_str(payload_data, 16 * 1024)

                return pkt_json
        return None
Esempio n. 6
0
class tcp_http_sniff():

	def __init__(self,interface,display_filter,syslog_ip,syslog_port,custom_tag,return_deep_info,filter_rules,cache_size,bpf_filter,timeout,debug):
		self.debug = debug
		self.timeout = timeout
		self.bpf_filter = bpf_filter
		self.cache_size = cache_size
		self.filter_rules = filter_rules
		self.return_deep_info = return_deep_info
		self.custom_tag = custom_tag
		self.syslog_ip = syslog_ip
		self.syslog_port = syslog_port
		self.log_obj = _logging(self.syslog_ip,self.syslog_port)
		self.interface = interface
		self.display_filter = display_filter
		self.pktcap = pyshark.LiveCapture(interface=self.interface, bpf_filter=self.bpf_filter, use_json=True, display_filter=self.display_filter, debug=self.debug)
		self.http_cache = Cache(maxsize=self.cache_size, ttl=120, timer=time.time, default=None)
		self.tcp_cache = Cache(maxsize=self.cache_size, ttl=120, timer=time.time, default=None)
		# 检测页面编码的正则表达式
		self.encode_regex = re.compile(b'<meta [^>]*?charset=["\']?([^"\'\s]+)["\']?', re.I)

	# 根据response_code和content_type过滤
	def http_filter(self,key,value):
		if key in self.filter_rules:
			for rule in self.filter_rules[key]:
				if rule in value:
					return True
		return False
	
	def run(self):
		try:
			self.pktcap.apply_on_packets(self.proc_packet,timeout=self.timeout)
		except concurrent.futures.TimeoutError:
			print("\nTimeoutError.")
	def proc_packet(self, pkt):
		try:
			pkt_json = None
			pkt_dict = dir(pkt)
			
			if 'ip' in pkt_dict:
				if 'http' in pkt_dict:
					pkt_json = self.proc_http(pkt)
				elif 'tcp' in pkt_dict:
					pkt_json = self.proc_tcp(pkt)

			if pkt_json:
				if self.debug:
					print(json.dumps(pkt_json))
				self.log_obj.info(json.dumps(pkt_json))

		except Exception:
			traceback.format_exc()
			# error_log_json = {}
			# error_log_json["custom_tag"] = self.custom_tag
			# error_log_json["error_log"] = str(traceback.format_exc())
			# if self.debug:
			# 	print(json.dumps(error_log_json))
			# self.log_obj.error(json.dumps(error_log_json))
	
	def proc_http(self, pkt):
		http_dict = dir(pkt.http)
		
		if self.return_deep_info:
			if 'request' in http_dict:
				self.http_cache.set(pkt.tcp.stream, pkt.http.request_full_uri if 'request_full_uri' in http_dict else pkt.http.request_uri)
		
		if 'response' in http_dict:
			pkt_json = {}
			src_addr = pkt.ip.src
			src_port = pkt[pkt.transport_layer].srcport
			
			cache_url = self.http_cache.get(pkt.tcp.stream)
			if cache_url:
				pkt_json['url'] = cache_url
				self.http_cache.delete(pkt.tcp.stream)
			
			if 'url' not in pkt_json:
				if 'response_for_uri' in http_dict:
					pkt_json["url"] = pkt.http.response_for_uri
				else:
					pkt_json["url"] = '/'

			# 处理 URL 只有URI的情况
			if pkt_json["url"][0] == '/':
				if src_port == '80':
					pkt_json["url"] = "http://%s%s"%(src_addr,pkt_json["url"])
				else:
					pkt_json["url"] = "http://%s:%s%s"%(src_addr,src_port,pkt_json["url"])

			# 缓存机制,防止短时间大量处理重复响应
			exists = self.http_cache.get(pkt_json['url'])
			if exists:
				return None

			self.http_cache.set(pkt_json["url"], True)

			pkt_json["pro"] = 'HTTP'
			pkt_json["tag"] = self.custom_tag
			pkt_json["ip"] = src_addr
			pkt_json["port"] = src_port

			if 'response_code' in http_dict:
				if self.filter_rules:
					return_status = self.http_filter('response_code', pkt.http.response_code)
					if return_status:
						return None
				pkt_json["code"] = pkt.http.response_code
			
			if 'content_type' in http_dict:
				if self.filter_rules:
					return_status = self.http_filter('content_type', pkt.http.content_type)
					if return_status:
						return None
				pkt_json["type"] = pkt.http.content_type.lower()
			else:
				pkt_json["type"] = 'unkown'

			if 'server' in http_dict:
				pkt_json["server"] = pkt.http.server

			# -r on开启深度数据分析,返回header和body等数据
			if self.return_deep_info:
				charset = 'utf-8'
				# 检测 Content-Type 中的编码信息
				if 'type' in pkt_json and 'charset=' in pkt_json["type"]:
					charset = pkt_json["type"][pkt_json["type"].find('charset=')+8:].strip().lower()
					if not charset :
						charset = 'utf-8'
				if 'payload' in dir(pkt.tcp):
					payload = bytes.fromhex(str(pkt.tcp.payload).replace(':', ''))
					if payload.find(b'HTTP/') == 0:
						split_pos = payload.find(b'\r\n\r\n')
						if split_pos <= 0 or split_pos > 4096:
							split_pos = 4096
						pkt_json["header"] = str(payload[:split_pos], 'utf-8', 'ignore')

				if 'file_data' in http_dict and pkt.http.file_data.raw_value and pkt_json['type'] != 'application/octet-stream':
					data = bytes.fromhex(pkt.http.file_data.raw_value)
					# 检测页面 Meta 中的编码信息
					data_head = data[:500] if data.find(b'</head>', 0, 1024) == -1 else data[:data.find(b'</head>')]
					match = self.encode_regex.search(data_head)
					if match:
						charset = str(match.group(1).lower(), 'utf-8', 'ignore')
					response_body = self.proc_body(str(data, charset, 'ignore'), 16*1024)
					pkt_json["body"] = response_body
				else:
					pkt_json["body"] = ''
			
			return pkt_json
		
		return None

	def proc_tcp(self, pkt):
		tcp_stream = pkt.tcp.stream
		
		pkt_json = {}
		pkt_json["pro"] = 'TCP'
		pkt_json["tag"] = self.custom_tag

		# SYN+ACK
		if pkt.tcp.flags == '0x00000012' : 
			server_ip = pkt.ip.src
			server_port = pkt[pkt.transport_layer].srcport
			tcp_info = '%s:%s' % (server_ip, server_port)

			exists = self.tcp_cache.get(tcp_info)
			if exists:
				return None
				
			if self.return_deep_info and tcp_info:
				self.tcp_cache.set(tcp_stream, tcp_info)
				self.tcp_cache.set(tcp_info,True)
			else:
				pkt_json["ip"] = server_ip
				pkt_json["port"] = server_port
				self.tcp_cache.set(tcp_info,True)
				return pkt_json
		
		# -r on开启深度数据分析,采集server第一个响应数据包
		if self.return_deep_info and pkt.tcp.seq == "1" and "payload" in dir(pkt.tcp) :
			tcp_info = self.tcp_cache.get(tcp_stream)
			if tcp_info:
				tcp_info_list = tcp_info.split(":")
				tcp_ip = tcp_info_list[0]
				tcp_port = tcp_info_list[1]
				pkt_json["ip"] = tcp_ip
				pkt_json["port"] = tcp_port
				payload_data = pkt.tcp.payload.replace(":","")
				if payload_data.startswith("48545450"):
					return None
				# HTTPS Protocol
				# TODO: other https port support 
				if tcp_port == "443" and payload_data.startswith("1603"):
					pkt_json["pro"] = 'HTTPS'
					pkt_json["url"] = "https://%s/"%(tcp_ip)
				else:
					pkt_json["data"] = payload_data
				self.tcp_cache.delete(tcp_stream)
				return pkt_json
		return None

	def proc_body(self, data, length):
		json_data = json.dumps(data)[:length]
		total_len = len(json_data)
		if total_len < length:
			return data
		
		pos = json_data.rfind("\\u")
		if pos + 6 > len(json_data):
			json_data = json_data[:pos]
		
		return json.loads(json_data + '"')