Esempio n. 1
0
    def fetch_alexa_cn(self):
        """get subdomains from alexa.cn"""
        sign = self.get_sign_alexa_cn()
        if sign is None:
            raise Exception("sign_fetch_is_failed")
        else:
            (domain, sig, keyt) = sign

        pre_domain = self.domain.split('.')[0]

        url = 'http://www.alexa.cn/api_150710.php'
        payload = {
            'url': domain,
            'sig': sig,
            'keyt': keyt,
        }
        r = http_request_post(url, payload=payload).text

        for sub in r.split('*')[-1:][0].split('__'):
            if sub.split(':')[0:1][0] == 'OTHER':
                break
            else:
                sub_name = sub.split(':')[0:1][0]
                sub_name = ''.join((sub_name.split(pre_domain)[0], domain))
                if is_domain(sub_name):
                    self.subset.append(sub_name)
Esempio n. 2
0
def baidu_site(key_domain='', sub_domain='', command=''):
    '''
	Get baidu site:target.com result
	'''
    headers = requests_headers()
    proxies = requests_proxies()
    if '://' in key_domain:
        key_domain = urlparse.urlparse(key_domain).hostname
    check = []
    baidu_url = 'https://www.baidu.com/s?ie=UTF-8&wd=site:{}'.format(
        key_domain)
    if command:
        baidu_url = 'https://www.baidu.com/s?ie=UTF-8&wd={}'.format(command)
    try:
        r = requests.get(url=baidu_url,
                         headers=headers,
                         timeout=10,
                         proxies=proxies,
                         verify=False).text
        if 'class="tip_head"' not in r:  # Check first
            for page in xrange(0, 21):  # max page_number
                pn = page * 50
                if key_domain:
                    newurl = 'https://www.baidu.com/s?ie=UTF-8&wd=site:{}&pn={}&rn=50&tn=baiduadv'.format(
                        key_domain, pn)
                if sub_domain:
                    newurl = 'https://www.baidu.com/s?ie=UTF-8&wd=site:{} -inurl:({})&pn={}&rn=50&tn=baiduadv'.format(
                        key_domain, sub_domain, pn)  # -site:(weibo.com)
                if command:
                    newurl = 'https://www.baidu.com/s?ie=UTF-8&wd={}&pn={}&rn=50&tn=baiduadv'.format(
                        command, pn)
                keys = requests.get(url=newurl,
                                    headers=headers,
                                    proxies=proxies,
                                    timeout=10,
                                    verify=False).content
                flags = re.findall(
                    r'style=\"text-decoration:none;\">(.*?)%s.*?<\/a><div class=\"c-tools\"'
                    % key_domain, keys)
                check_flag = keys.count('class="n"')
                for flag in flags:
                    domain_handle = flag.replace('https://', '').replace(
                        'http://', '').replace('<b>', '').replace('</b>', '')
                    if domain_handle != '':  # xxooxxoo.xoxo.com ignore "..."
                        domain_flag = domain_handle + key_domain
                        if domain_flag not in check and is_domain(domain_flag):
                            if domain_flag not in baidu_domainss:
                                check.append(domain_flag)
                                print '[+] Get baidu site: > ' + domain_flag
                                baidu_domainss.append(domain_flag)
                if check_flag < 2 and page > 2:
                    # for domain_key in baidu_domainss: # sub max num to inurl:( -flag)
                    # 	baidu_domainss += baidu_site(domain_key)
                    return check  # list(set(baidu_domainss))
        else:
            print '[!] baidu site:domain no result'
            return []
    except Exception, e:
        # print traceback.format_exc()
        pass
Esempio n. 3
0
    def run(self):

        try:
            timestemp = time.time()
            url = "{0}?0.{1}&callback=&k={2}&page=1&order=default&sort=desc&action=moreson&_={3}&verify={4}".format(
                self.url, timestemp, self.domain, timestemp, self.verify)
            result = json.loads(req.get(url).content)
            if result.get('status') == '1':
                for item in result.get('data'):
                    if is_domain(item.get('domain')):
                        self.subset.append(item.get('domain'))
            elif result.get('status') == 3:
                logging.info("chaxun.la api block you ip...")
                logging.info(
                    "input you verify_code in http://subdomain.chaxun.la/wuyun.org/"
                )
                # print('get verify_code():', self.verify)
                # self.verify_code()
                # self.run()
            self.subset = list(set(self.subset))
            print "[-] {0} found {1} domains".format(self.engine_name,
                                                     len(self.subset))
            return self.subset
        except Exception as e:
            logging.info(str(e))
            print "[-] {0} found {1} domains".format(self.engine_name,
                                                     len(self.subset))
            return self.subset
Esempio n. 4
0
 def run(self):
     try:
         timestemp = time.time()
         url = "{0}?0.{1}&callback=&k={2}&page=1&order=default&sort=desc&action=moreson&_={3}&verify={4}".format(
             self.url, timestemp, self.domain, timestemp, self.verify)
         #response = req.get(url,proxies=self.proxy).content
         # no proxy needed for this class
         response = req.get(url).content
         result = json.loads(response)
         if result.get('status') == '1':
             for item in result.get('data'):
                 if is_domain(item.get('domain')):
                     self.domain_name.append(item.get('domain'))
         elif result.get('status') == 3:
             logger.warning("chaxun.la api block our ip...")
             logger.info("input you verify_code")
             # print('get verify_code():', self.verify)
             # self.verify_code()
             # self.run()
         self.domain_name = list(set(self.domain_name))
     except Exception as e:
         logger.error("Error in {0}: {1}".format(__file__.split('/')[-1], e))
     finally:
         logger.info("{0} found {1} domains".format(self.engine_name, len(self.domain_name)))
         return self.domain_name,self.smiliar_domain_name,self.email
Esempio n. 5
0
    def fetch_chinaz(self):
        """get subdomains from alexa.chinaz.com"""

        url = 'http://alexa.chinaz.com/?domain={0}'.format(self.domain)
        r = http_request_get(url).content
        subs = re.compile(r'(?<="\>\r\n<li>).*?(?=</li>)')
        result = subs.findall(r)
        for sub in result:
            if is_domain(sub):
                self.subset.append(sub)
Esempio n. 6
0
def github_site(subdoamin, key_domain):
    headers = requests_headers()
    proxies = requests_proxies()
    if '://' in key_domain:
        key_domain = urlparse.urlparse(url).hostname
    github_domains = []
    session = requests.Session()
    headers['Cookie'] = github_cookie
    try:
        # check_login = '******'
        # req_check = session.get(url=check_login,headers=headers,proxies=proxies,timeout=10,verify=False).content
        # if github_account in req_check:
        # 	print '[*] Github site:domain login check Success'
        headers['Host'] = 'github.com'
        headers[
            'Referer'] = 'https://github.com/search?utf8=%E2%9C%93&q=*&type=Code'
        github_url = 'https://github.com/search?q={}&type=Code&utf8=%E2%9C%93'.format(
            subdoamin)
        req = session.get(url=github_url,
                          headers=headers,
                          proxies=proxies,
                          timeout=10,
                          verify=False).content
        if 'blankslate' not in req:  #if 'code results' in req:
            for page in xrange(1, 100):
                newurl = 'https://github.com/search?p={}&q={}&type=Code&s=&utf8=%E2%9C%93'.format(
                    page, subdoamin)
                req_new = session.get(url=newurl,
                                      headers=headers,
                                      proxies=proxies,
                                      timeout=10,
                                      verify=False).content
                req_new = req_new.replace('</em>',
                                          '').replace('<em>', '').replace(
                                              '</span>', '')
                url_regexs = []
                url_regex_url,url_regex_host,url_regex_x,url_regex_a,url_regex_b,url_regex_c,url_regex_b_a,url_regex_c_a,url_regex_d = [],[],[],[],[],[],[],[],[]
                try:
                    url_regex_url = re.findall(r'//([\s\S]*?)%s' % key_domain,
                                               req_new)
                except:
                    pass
                try:
                    url_regex_host = re.findall(
                        r'&quot;([\s\S]*?)%s' % key_domain, req_new)
                except:
                    pass
                try:
                    url_regex_x = re.findall(r'&#39;([\s\S]*?)%s' % key_domain,
                                             req_new)
                except:
                    pass
                try:
                    url_regex_a = re.findall(r'/([\s\S]*?)%s' % key_domain,
                                             req_new)
                except:
                    pass
                try:
                    url_regex_b = re.findall(
                        r'\[<span .*?>([\s\S]*?)%s' % key_domain, req_new)
                except:
                    pass
                try:
                    url_regex_b_a = re.findall(r'\[([\s\S]*?)%s' % key_domain,
                                               req_new)
                except:
                    pass
                try:
                    url_regex_c_a = re.findall(r'\(([\s\S]*?)%s' % key_domain,
                                               req_new)
                except:
                    pass
                try:
                    url_regex_c = re.findall(
                        r'\(<span .*?>([\s\S]*?)%s' % key_domain, req_new)
                except:
                    pass
                try:
                    url_regex_d = re.findall(
                        r'<span .*?>([\s\S]*?)%s' % key_domain, req_new)
                except:
                    pass
                url_regexs = url_regex_url + url_regex_host + url_regex_x + url_regex_a + url_regex_b + url_regex_c + url_regex_b_a + url_regex_c_a + url_regex_d
                for sub in url_regexs:
                    if sub not in github_domains and sub_filter not in sub and sub != '.' and filter_list(
                            module=sub, filter_list=github_sub_filter
                    ) and sub[-1:] != '-' and sub[-1:] != '_':
                        sub.replace(' ', '')
                        if sub[-1:] == '.':
                            subs = sub + key_domain
                        else:
                            subs = sub + '.' + key_domain
                        if is_domain(subs) and subs not in github_domainss:
                            print '[+] Get github site:domain > ' + subs
                            github_domainss.append(subs)
                            github_domains.append(subs)
                if 'next_page disabled' in req_new:
                    return github_domains
        else:
            print '[!] github site:domain no result'
            pass
        # else:
        # 	print '[!] Github login check Error'
        # 	print '[*] Please try again'
        # 	pass
    except Exception, e:
        # print traceback.format_exc()
        pass