def _zoomeye_api(search, page, z_type): """ app:"Drupal" country:"JP" curl -X POST https://api.zoomeye.org/user/login -d ' { "username": "******", "password": "******" }' """ headers = {} url_login = '******' try: data = { 'username': conf['config']['zoomeye_api']['username'], 'password': conf['config']['zoomeye_api']['password'] } res = mycurl("post", url_login, json=data, headers=headers) if res == None: sys.exit(logger.error("Zoomeye api is not available.")) headers["Authorization"] = "JWT " + json.loads( res.text)['access_token'] except KeyError: sys.exit( logger.error( "Load tentacle config error: zoomeye_api, please check the config in tentacle.conf." )) except AttributeError as e: sys.exit(logger.error("Zoomeye api error: the response is none.")) except Exception as e: sys.exit(logger.error("Zoomeye api error: %s" % type(e).__name__)) if z_type.lower() == 'web': url_api = "https://api.zoomeye.org/web/search" elif z_type.lower() == 'host': url_api = "https://api.zoomeye.org/host/search" else: logger.error("Error zoomeye api with type {0}.".format(z_type)) return None logger.sysinfo("Using zoomeye api with type {0}.".format(z_type)) for n in range(1, page + 1): logger.debug("Find zoomeye url of %d page..." % int(n)) try: data = {'query': search, 'page': str(n)} res = mycurl("get", url_api, params=data, headers=headers) if int(res.status_code) == 422: sys.exit(logger.error("Error zoomeye api token.")) if z_type.lower() == 'web': result = re.compile('"url": "(.*?)"').findall(res.text) elif z_type.lower() == 'host': result = [ str(item['ip']) + ':' + str(item['portinfo']['port']) for item in json.loads(res.text)['matches'] ] logger.debug("Zoomeye Found: %s" % result) yield result except Exception: yield []
def _360so(search, page): for n in range(1, page + 1): base_url = 'https://www.so.com/s?q=' + str(quote(search)) + '&pn=' + str(n) + '&fr=so.com' try: r = mycurl('get', base_url) soup = BeautifulSoup(r.text, "html.parser") for a in soup.select('li.res-list > h3 > a'): url1 = mycurl('get', a['href']) url = re.findall("URL='(.*?)'", url1.text)[0] if re.findall("URL='(.*?)'", url1.text) else url1.url logger.debug("360so Found: %s" % url) yield url except: yield None
def _baidu(search, page): for n in range(0, page * 10, 10): base_url = 'https://www.baidu.com/s?wd=' + str(quote(search)) + '&oq=' + str( quote(search)) + '&ie=utf-8' + '&pn=' + str(n) try: r = mycurl('get',base_url) soup = BeautifulSoup(r.text, "html.parser") for a in soup.select('div.c-container > h3 > a'): url = mycurl('get', a['href']).url logger.debug("Baidu Found: %s" % url) yield url except: yield None
def _fofa_api_today_poc(page): target_list = [] url = "https://fofa.so/about_client" res = mycurl('get', url) if res != None: poc_soup = BeautifulSoup(res.content, 'lxml') poc_result_name = poc_soup.select( 'body > div.fdo > div:nth-of-type(3) > div > div > ul > li:nth-of-type(1)' ) poc_result_raw = poc_soup.select( 'body > div.fdo > div:nth-of-type(3) > div > div > ul > li:nth-of-type(4) > a' ) for i in range(len(poc_result_name)): result_name = str(poc_result_name[i])[11:-5] result_raw = str( poc_result_raw[i])[str(poc_result_raw[i]).find(';">'):-4] result_raw = result_raw.replace(';">', '') logger.sysinfo("Search fofa api %s: %s" % (result_name, result_raw)) matchObj = re.search(r'[a-zA-Z0-9]+', result_name) if matchObj: server = matchObj.group().lower() for z in _fofa_api(result_raw, page, False): target_list.append((z, server)) else: for z in _fofa_api(result_raw, page, False): target_list.append((z, None)) return target_list
def _fofa_api(search, page, flag = True): ''' https://fofa.so/api#auth ''' url_login = '******' result = [] try: email = conf['config']['fofa_api']['email'] key = conf['config']['fofa_api']['token'] except KeyError: sys.exit(logger.error("Load tentacle config error: zfofa_api, please check the config in tentacle.conf.")) if flag: logger.sysinfo("Using fofa api...") search = str(base64encode(bytes(search, 'utf-8')),'utf-8') for p in range(1,page+1): logger.debug("Find fofa url of %d page..." % int(p)) res = mycurl('post',url_login + '?email={0}&key={1}&page={2}&qbase64={3}'.format(email, key,p, search)) if res !=None : if int(res.status_code) == 401: sys.exit(logger.error("Error fofa api access, maybe you should pay fofa coin and enjoy service.")) else: res_json = json.loads( res.text) if res_json["error"] is None: for item in res_json.get('results'): logger.debug("Fofa Found: %s" % item[0]) result.append(item[0]) return result
def geturl(host, port, params=None, **kwargs): for pro in ['http://', "https://"]: _port = port if port != None and port != 0 else 443 if pro == 'https' else 80 _pro = 'https://' if port == 443 else pro url = _pro + host + ":" + str(_port) + '/' res = mycurl('head', url, params, **kwargs) if res != None: if res.status_code == 400 and 'The plain HTTP request was sent to HTTPS port' in res.text: continue return url, host, _port return None, host, port
def _bing(search, page): for n in range(1, (page * 10) + 1, 10): base_url = 'http://cn.bing.com/search?q=' + str(quote(search)) + '&first=' + str(n) try: r = mycurl('get', base_url) soup = BeautifulSoup(r.text, "html.parser") for a in soup.select('li.b_algo > div.b_algoheader > a'): url = a['href'] logger.debug("Bing Found: %s" % url) yield url except: yield None
def _github_extract(search, git_urls): InformationRegex = { "mail": r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9\-.]+)", "domain": r"(http[s]*://[^<|\"|\s]*)", "other": r"(\b(?:(?!http|jar)[a-z])+://[^<|\"|\s]*)", "pass1": r"(pass[^<|?|\r|\n]{1,30})", "pass2": r"(password[^<|?|\r|\n]{1,30})", "pass3": r"(pwd[^<|?|\r|\n]{1,30})", "root": r"(root[^<|?|\r|\n]{1,30})", "title": r"<title>(.*)<\/title>", "ip": r"([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}:*[0-9]{0,5})" } github_timeout = 20 # git_urls = list(set(git_urls)) for url in git_urls: try: _resp = mycurl('get', url, timeout=github_timeout) except: _resp = None if _resp and _resp.status_code == 200: soup = BeautifulSoup(_resp, "html5lib") tap = soup.table if soup.table else soup.article try: _text = tap.text.rstrip() except: continue _text = _text.replace('"', '"') _text = _text.replace('&', '&') _text = _text.replace('<', '<') _text = _text.replace('>', '>') _text = _text.replace(' ', ' ') for i in InformationRegex: res = re.findall(InformationRegex[i], _text) for _re in res: # print(_re) if 'github' not in _re and 'schema.org' not in _re: # if search in _re: if i == 'mail': logger.sysinfo("Found info: %s [%s]" % (url, _re)) elif i == 'domain': logger.sysinfo("Found info: %s [%s]" % (url, _re)) elif 'pass' in i: logger.sysinfo("Found info: %s [%s]" % (url, _re)) else: logger.sysinfo("Found info: %s [%s]" % (url, _re)) elif _resp and _resp.status_code == 404: pass else: logger.error(_resp.text) logger.error(_resp.status_code) time.sleep(60)
def _google_api(search, page): ''' https://console.developers.google.com https://developers.google.com/custom-search/v1/cse/list poc-t search_enging 011385053819762433240:ljmmw2mhhau https://cse.google.com.hk/cse?cx=011385053819762433240:ljmmw2mhhau&gws_rd=cr ''' try: developer_key = conf['config']['google_api']['developer_key'] search_enging = conf['config']['google_api']['search_enging'] except KeyError: sys.exit( logger.error( "Load tentacle config error: google_api, please check the config in tentacle.conf." )) anslist = [] for p in range(0, page): base_url = 'https://www.googleapis.com/customsearch/v1?cx={0}&key={1}&num=10&start={2}&q={3}'.format( search_enging, developer_key, str(p * 10 + 1), search) try: _proxies = None if conf['config']['proxy']['proxy'].lower() == 'true': try: _proxies = { 'http': conf['config']['proxy']['http_proxy'], 'https': conf['config']['proxy']['https_proxy'] } except: logger.error("Error http(s) proxy: %s or %s." % (conf['config']['proxy']['http_proxy'], conf['config']['proxy']['https_proxy'])) res = mycurl('get', base_url, proxies=_proxies, timeout=10) except: res = None if res != None: if int(res.status_code) == 200: res_json = json.loads(res.text) try: for item in res_json.get('items'): anslist.append(item.get('link')) except: break else: logger.error( "Error google api access, and api rate limit 100/day, maybe you should pay money and enjoy service." ) break return anslist
def _ceye_verify_api(filter, t = 'dns'): try: token = conf['config']['ceye_api']['token'] except KeyError: logger.error("Load tentacle config error: ceye_api, please check the config in tentacle.conf.") return False filter = filter.replace('http://','')[0:20] url = "http://api.ceye.io/v1/records?token={token}&type={type}&filter={filter}".format(token = token,type = t,filter = filter) res = mycurl('get',url) if res == None: logger.error("The ceye api is unavailable.!") elif res.status_code == 503: logger.error("The ceye api does not support such high frequency requests for the time being. Please reduce the thread to run again by --thread 5") elif filter in res.text: return True return False
def curl(method, url, params=None, **kwargs): return mycurl(method, url, params=params, **kwargs)
def _github_api(search, page): ''' https://github.com/settings/tokens Generate new token ''' per_page_limit = 50 github_timeout = 20 # headers = {} url_api = "https://api.github.com/search/code?sort=updated&order=desc&per_page=%s&q=" % per_page_limit try: token = conf['config']['github_api']['token'] except KeyError: sys.exit( logger.error( "Load tentacle config error: github_api, please check the config in tentacle.conf." )) headers["Authorization"] = "token " + token resp = mycurl('get', url_api + search, headers=headers, timeout=github_timeout) if resp != None and resp.status_code == 200: logger.sysinfo("Using github api...") res_json = json.loads(resp.content) total = res_json["total_count"] logger.sysinfo("Found github url: %d" % int(total)) page_num = (total // per_page_limit) + 1 page_num = page_num if page_num < page else page git_urls = [] for p in range(1, page_num + 1): # Search url _url_api = "https://api.github.com/search/code?sort=updated&order=desc&page=%d&per_page=%s&q=" % ( p, per_page_limit) _resp = mycurl('get', _url_api + search, headers=headers, timeout=github_timeout) if _resp != None and _resp.status_code == 200: logger.debug("Find github url of %d page..." % int(p)) try: _res_json = json.loads(_resp.content) for i in range(len(_res_json['items'])): git_urls.append(_res_json['items'][i]["html_url"]) except: pass elif _resp != None and int(_resp.status_code) == 422: logger.error( "Warning: github api access rate limit 20/minute, 5000/hour, 1000 search results." ) logger.error("Error github api token. Wait for a minute.") # Access url and match, 既然限制了,那就干点其他事情。 logger.sysinfo( "So, this program will access target url and wait for rate limit. " ) _github_extract(git_urls, search) git_urls = [] elif _resp != None and int(_resp.status_code) == 403: p = p - 1 logger.error( "Too many times for access. So we should wait for ten minute." ) time.sleep(60 * 10) else: p = p - 1 logger.error(_resp.text) logger.error(_resp.status_code) time.sleep(60) _github_extract(search, git_urls) git_urls = [] elif int(resp.status_code) == 422: sys.exit(logger.error("Error github api token.")) return []
def _github_api(search, page): ''' https://github.com/settings/tokens Generate new token ''' per_page_limit = 50 github_timeout = 20 # InformationRegex = {"mail": r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)", "domain": r"(http[s]*://[^<|\"|?]*)", "pass1": r"(pass[^<|?]{30})", "pass2": r"(password[^<|?]{30})", "pass3": r"(pwd[^<|?]{30})", "root": r"(root[^<|?]{0,30})", "title": r"<title>(.*)<\/title>", "ip": r"([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}:*[0-9]{0,5})"} headers = {} url_api = "https://api.github.com/search/code?sort=updated&order=desc&per_page=%s&q=" %per_page_limit try: token = conf['config']['github_api']['token'] except KeyError: sys.exit(logger.error("Load tentacle config error: github_api, please check the config in tentacle.conf.")) headers["Authorization"] = "token " + token resp = mycurl('get',url_api + search, headers = headers, timeout=github_timeout) if resp!=None and resp.status_code == 200: logger.sysinfo("Using github api...") res_json = json.loads(resp.content) total = res_json["total_count"] logger.sysinfo("Found github url: %d"%int(total)) page_num = (total // per_page_limit) + 1 page_num = page_num if page < page_num else page git_urls = [] for p in range(1,page_num + 1): # Search url _url_api = "https://api.github.com/search/code?sort=updated&order=desc&page=%d&per_page=%s&q=" % (p,per_page_limit) _resp = mycurl('get',_url_api + search, headers=headers, timeout=github_timeout) if _resp!=None and _resp.status_code == 200: logger.debug("Find github url of %d page..." % int(p)) try: _res_json = json.loads(_resp.content) for i in range(len(_res_json['items'])): git_urls.append(_res_json['items'][i]["html_url"]) except: pass elif _resp!=None and int(_resp.status_code) == 422: logger.error("Warning: github api access rate limit 20/minute, 5000/hour, 1000 search results.") logger.error("Error github api token. Wait for a minute.") # Access url and match, 既然限制了,那就干点其他事情。 logger.sysinfo("So, this program will access target url and wait for rate limit. ") git_urls = list(set(git_urls)) for url in git_urls: try: _resp = mycurl('get',url,timeout=github_timeout) except: _resp = None if _resp and _resp.status_code == 200: for i in InformationRegex: _text = _resp.text.lower() _text = _text.replace('"', '"') _text = _text.replace('&', '&') _text = _text.replace('<', '<') _text = _text.replace('>', '>') _text = _text.replace(' ', ' ') res = re.findall(InformationRegex[i], _text) for _re in res: if 'github' not in _re: if search in _re: if InformationRegex[i] == 'mail' : logger.sysinfo("Found info: %s [%s]"%(url,_re)) elif InformationRegex[i] == 'domain' : logger.sysinfo("Found info: %s [%s]" % (url, _re)) elif 'pass' in InformationRegex[i]: logger.sysinfo("Found info: %s [%s]" % (url, _re)) elif _resp and _resp.status_code == 404: pass else : logger.error(_resp.text) logger.error(_resp.status_code) time.sleep(60) git_urls = [] elif _resp!=None and int(_resp.status_code) == 403: p = p - 1 logger.error("Too many times for access. So we should wait for ten minute.") time.sleep(60*10) else: p = p - 1 logger.error(_resp.text) logger.error(_resp.status_code) time.sleep(60) elif int(resp.status_code) == 422: sys.exit(logger.error("Error github api token.")) return []