Exemple #1
0
    def RegularPraser(self, response, parser):
        '''
        针对正则表达式进行解析
        :param response:
        :param parser:
        :return:
        '''
        proxylist = []
        pattern = re.compile(parser['pattern'])
        matchs = pattern.findall(response)
        if matchs != None:
            for match in matchs:
                try:
                    ip = match[parser['position']['ip']]
                    port = match[parser['position']['port']]
                    # 网站的类型一直不靠谱所以还是默认,之后会检测
                    type = 0
                    # if parser['postion']['protocol'] > 0:
                    # protocol = match[parser['postion']['protocol']]
                    # if protocol.lower().find('https')!=-1:
                    #         protocol = 1
                    #     else:
                    #         protocol = 0
                    # else:
                    protocol = 0
                    addr = self.ips.getIpAddr(self.ips.str2ip(ip))
                    country = text_('')
                    area = text_('')
                    # print(ip,port)
                    if text_('省') in addr or self.AuthCountry(addr):
                        country = text_('国内')
                        area = addr
                    else:
                        country = text_('国外')
                        area = addr
                except Exception as e:
                    continue

                proxy = {
                    'ip': ip,
                    'port': port,
                    'types': type,
                    'protocol': protocol,
                    'country': country,
                    'area': area,
                    'speed': 100
                }

                proxylist.append(proxy)
            return proxylist
Exemple #2
0
 def AuthCountry(self, addr):
     '''
     用来判断地址是哪个国家的
     :param addr:
     :return:
     '''
     for area in CHINA_AREA:
         if text_(area) in addr:
             return True
     return False
Exemple #3
0
    def XpathPraser(self, response, parser):
        '''
        针对xpath方式进行解析
        :param response:
        :param parser:
        :return:
        '''
        proxylist = []
        root = etree.HTML(response)
        proxys = root.xpath(parser['pattern'])
        for proxy in proxys:
            try:
                ip = proxy.xpath(parser['position']['ip'])[0].text
                port = proxy.xpath(parser['position']['port'])[0].text
                type = 0
                protocol = 0
                addr = self.ips.getIpAddr(self.ips.str2ip(ip))
                country = text_('')
                area = text_('')
                if text_('省') in addr or self.AuthCountry(addr):
                    country = text_('国内')
                    area = addr
                else:
                    country = text_('国外')
                    area = addr
            except Exception as e:
                continue
            # updatetime = datetime.datetime.now()
            # ip,端口,类型(0高匿名,1透明),protocol(0 http,1 https http),country(国家),area(省市),updatetime(更新时间)

            # proxy ={'ip':ip,'port':int(port),'type':int(type),'protocol':int(protocol),'country':country,'area':area,'updatetime':updatetime,'speed':100}
            proxy = {
                'ip': ip,
                'port': int(port),
                'types': int(type),
                'protocol': int(protocol),
                'country': country,
                'area': area,
                'speed': 100
            }
            proxylist.append(proxy)
        return proxylist
Exemple #4
0
 def dump(self, first, last):
     if last > self.indexCount:
         last = self.indexCount
     for index in range(first, last):
         offset = self.firstIndex + index * 7
         self.ipdb.seek(offset)
         buf = self.ipdb.read(7)
         (ip, of1, of2) = struct.unpack("IHB", buf)
         address = self.getAddr(of1 + (of2 << 16))
         # 把GBK转为utf-8
         address = text_(address, 'gbk').encode("utf-8")
         logger.info("%d %s %s" % (index, self.ip2str(ip), address))
Exemple #5
0
 def getIpAddr(self, ip):
     L = 0
     R = self.indexCount - 1
     while L < R - 1:
         M = int((L + R) / 2)
         self.setIpRange(M)
         if ip == self.curStartIp:
             L = M
             break
         if ip > self.curStartIp:
             L = M
         else:
             R = M
     self.setIpRange(L)
     # version information, 255.255.255.X, urgy but useful
     if ip & 0xffffff00 == 0xffffff00:
         self.setIpRange(R)
     if self.curStartIp <= ip <= self.curEndIp:
         address = self.getAddr(self.curEndIpOffset)
         # 把GBK转为utf-8
         address = text_(address)
     else:
         address = text_("未找到该IP的地址")
     return address
Exemple #6
0
 def getAddr(self, offset, ip=0):
     self.ipdb.seek(offset + 4)
     countryAddr = text_("")
     areaAddr = text_("")
     str = self.ipdb.read(1)
     (byte, ) = struct.unpack('B', str)
     if byte == 0x01:
         countryOffset = self.getLong3()
         self.ipdb.seek(countryOffset)
         str = self.ipdb.read(1)
         (b, ) = struct.unpack('B', str)
         if b == 0x02:
             countryAddr = self.getString(self.getLong3())
             self.ipdb.seek(countryOffset + 4)
         else:
             countryAddr = self.getString(countryOffset)
         areaAddr = self.getAreaAddr()
     elif byte == 0x02:
         countryAddr = self.getString(self.getLong3())
         areaAddr = self.getAreaAddr(offset + 8)
     else:
         countryAddr = self.getString(offset + 4)
         areaAddr = self.getAreaAddr()
     return countryAddr + text_(" ") + areaAddr
Exemple #7
0
 def proxy_listPraser(self, response, parser):
     proxylist = []
     pattern = re.compile(parser['pattern'])
     matchs = pattern.findall(response)
     if matchs:
         for match in matchs:
             try:
                 ip_port = base64.b64decode(
                     match.replace("Proxy('", "").replace("')", ""))
                 ip = ip_port.split(':')[0]
                 port = ip_port.split(':')[1]
                 type = 0
                 protocol = 0
                 addr = self.ips.getIpAddr(self.ips.str2ip(ip))
                 country = text_('')
                 area = text_('')
                 # print(ip,port)
                 if text_('省') in addr or self.AuthCountry(addr):
                     country = text_('国内')
                     area = addr
                 else:
                     country = text_('国外')
                     area = addr
             except Exception as e:
                 continue
             proxy = {
                 'ip': ip,
                 'port': int(port),
                 'types': type,
                 'protocol': protocol,
                 'country': country,
                 'area': area,
                 'speed': 100
             }
             proxylist.append(proxy)
         return proxylist