def google(self): for loop in range(0, int(ask('How many pages?'))): url = f"https://google.com/search?q=site:{self.sb.domain}&ie=utf-8&oe=utf-8&aq=t&start={str(loop)}0" res = REQ_S.get(url) if res.status_code != 200: pr('Bad status code: %d' % res.status_code, '!') return c = 0 soup = BeautifulSoup(res.content, 'html.parser') for l in soup.find_all('cite'): if not l.span: continue # print(l) ls = urlsplit('http://' + l.span.decode_contents()) if self.sb.domain not in ls.netloc: pr('Wrong domain found: ' + fy + ls.path + fx, '!') continue pts = ls.netloc.split('.') if len(pts) > 2 and ls.netloc not in self.sb.known_subdomains: pr('Found new subdomain: ' + fc + ls.netloc + fx) continue if ls.path not in self.known_paths: self.known_paths.add(ls.path) c += 1 pr(f'Added {c} new paths')
def crawl(self, page): pr('Crawling page: ' + fc + page + fx) url = self.sb.pack_url(path=page) res = REQ_S.get(url) if res.status_code == 404: i = f'page: "{page}" is 404' logger.info(i) pr(i, '!') return elif res.status_code != 200: i = f'page returned code "{res.status_code}" <=> "{page}" ' logger.info(i) pr(i, '!') return il = xl = 0 soup = BeautifulSoup(res.content, 'html.parser') # Links are in ['a', 'link', 'img', 'svg', 'iframe', 'embed', 'audio'] for k, v in { 'a': 'href', 'link': 'href', 'iframe': 'src', 'embed': 'src' }.items(): for l in soup.find_all(k): try: x = l[v].lower() except KeyError: i = f'"{page}" KeyError: No link found in "{k}" element' logger.info(i) pr(i, '!') continue if x.startswith('#'): continue if x.endswith('.ico'): continue if x.startswith('/'): x = url + x if re.match(r'[^@]+@[^@]+\.[^@]+', x): # Email if x not in self.emails: pr('Found new email: ' + fc + x + fx) self.emails.add(x) continue ux = urlsplit(x) if self.sb.domain not in ux.netloc: self.external_res.add(x) xl += 1 continue final = ux.path.replace('//', '/') # replacing as a workaround if final not in self.known_paths: self.known_paths.add(final) il += 1
def banners_cloud_flare(self): pr('Retrieving headers', '#') domain = self.pack_url() res = REQ_S.get(domain) if res.status_code != 200: pr('Bad status code: %d' % res.status_code, '!') return pr('Headers:') for h in res.headers.items(): x = f'{h[0]} => {h[1]}' pr(x, '#') pr('Checking for CloudFlare in headers', '#') if "cloudflare" not in res.text: pr(self.domain + " is not using Cloudflare!") return if not pause('Attempt to bypass?', cancel=True): return pr("CloudFlare found, attempting to bypass..") # TODO TEST url = "http://www.crimeflare.biz/cgi-bin/cfsearch.cgi" res = REQ_S.get(url, data={'cfS': self.domain}) reg = re.findall(r'\d+\.\d+\.\d+\.\d+', res.text) if reg: real_ip = reg[1] else: pr("CloudFlare wasn't bypassed, No real IP found", '!') return res = REQ_S.get(f"http://{real_ip}") if "cloudflare" not in res.text.lower(): if real_ip: pr("Cloudflare Bypassed!", '#') pr('===============================') pr("Real IP ==> " + fc + real_ip + fx) pr('===============================') return pr("Cloudflare wasn't bypassed, Real IP blocked by CloudFlare", '!')
def whois(self): url = 'https://www.whois.com/whois/' + self.domain try: res = REQ_S.get(url) if res.status_code != 200: pr('Bad status code: %d' % res.status_code, '!') return bs = bs4.BeautifulSoup(res.content, 'html.parser') result = bs.find_all('pre', {'class': 'df-raw'})[0].decode_contents() print(f"\n{fc + result + fx}") except requests.exceptions.RequestException: from traceback import print_exc print_exc()
def speed_check(self): import time start = time.time() ip = socket.gethostbyname(self.domain) dns_tm = time.time() - start _dns = "{:<10}:{:<20} seconds".format("DNS", dns_tm) pr(_dns, '#') start = time.time() _data = REQ_S.get(self.pack_url()) load_tm = time.time() - start _load = "{:<10}:{:<20} seconds".format("Load", load_tm) _wo = "{:<10}:{:<20} seconds".format("W/O DNS", load_tm - dns_tm) pr(_load, '#') pr(_wo, '#')
def find_subdomains(self): print("{}{:<62}| {:<50}{}".format(fc, "URL", "STATUS", fx)) with open('./src/subdomains') as f: for sub in f: if sub == self.subdomain: continue sub = sub.strip() url = self.pack_url(subdomain=sub) try: res = REQ_S.get(url) if res.status_code != 404: print("{}{:<62}| {:<50}{}".format( fg, url, res.status_code, fx)) except KeyboardInterrupt: pr('Scan stopped!', '!') break except: print("{}{:<62}| {:<50}{}".format(fr, url, 'ERROR', fx))
def reverse_YGS(self): # TODO record to file url = "https://domains.yougetsignal.com/domains.php" data = {'remoteAddress': self.domain, 'key': ''} res = REQ_S.get(url, params=data) if res.status_code != 200: pr('Bad status code: %d' % res.status_code, '!') # return grab = res.json() if 'fail' in grab['status'].lower(): pr("Message:", '#') print(grab['message']) return pr("Results from: " + grab['lastScrape'], '#') pr("IP: " + grab['remoteIpAddress'], '#') pr("Domain: " + grab['remoteAddress'], '#') pr(f"Total Domains: {grab['domainCount']}\n", '#') for x, _ in grab['domainArray']: pr(x, '#')
def reverse_HT(self): url = "http://api.hackertarget.com/reverseiplookup/?q=" + self.domain res = REQ_S.get(url) if res.status_code != 200: pr('Bad status code: %d' % res.status_code, '!') return lst = res.text.strip().split("\n") reverse_dir = './reverseip' if not os.path.isdir(reverse_dir): os.mkdir(reverse_dir) fn = os.path.join(reverse_dir, f'ht-{self.domain}') with open(fn, 'w') as f: for l in lst: if l: f.write(l.strip() + '\n') # pr(l, '#') print() pr(f'Dumped {len(lst)} entries to "{fn}"\n')