Example #1
0
    def get_domains_by_ip(self, ip):
        print_status('Start get domain by ip through bing...')
        url = 'http://cn.bing.com/search?q=ip:%s&first=999999991&FORM=PERE' % ip
        html = request(url, 'GET')

        domain_regx = r'''
            <h2><a\shref="https?://([^"]*?)"\starget="_blank"\sh="ID=[^"]*?">[^<]*?</a></h2>
        '''
        domain_list = re.findall(domain_regx, html, re.X)

        total_page_regx = r'''<span\sclass="sb_count">\d*?\s-\s\d*?\s[^\(]*?\([^\s]*?\s(\d*?)\s[^\)]*?\)</span>'''
        result = re.search(total_page_regx, html)
        try:
            total_num = int(result.group(1).replace(',', ''))
        except:
            total_num = 9
        page_count = total_num / 9
        if total_num % 9 > 0:
            page_count += 1
        print_status('Total pages: %s, Total domains:%s' % (page_count, total_num))

        if page_count > 0:
            for n in range(total_num-1):
                print_status('Get page %s domains...' % str(n+1))
                url = 'http://cn.bing.com/search?q=ip:%s&first=%s1&FORM=PERE3' % (ip, n)
                html = request(url, 'GET')
                new_domain_list = re.findall(domain_regx, html, re.X)
                domain_list.extend(new_domain_list)
                time.sleep(20)

        domain_list = [
            Domain.get_domain(domain)
            for domain in domain_list
            if Domain.get_domain(domain)
        ]
        return list(set(domain_list))
Example #2
0
 def get_subdomain_by_links(self, domain, level=4):
     domain = Domain.get_domain(domain)
     url = 'http://i.links.cn/subdomain/'
     data = {
         'domain': domain,
         'b2': 1,
         'b3': 1 if level>=3 else 0,
         'b4': 1 if level>=4 else 0,
     }
     html = request(url, 'POST', data=data)
     regex = '''<a\shref="http://[^"]*?"\srel=nofollow\starget=_blank>http://([^"]*?)</a></div>'''
     try:
         result = re.findall(regex, html)
     except:
         result = []
         time.sleep(3)
     result.append(domain)
     return list(set(result))
Example #3
0
 def _get_text_list(self, config, value):
     url = config[0] % value
     regx = config[1]
     text = request(url)
     result = re.findall(regx, text, re.I|re.S|re.X)
     return result