def module_run(self, domains): base_url = 'https://search.yahoo.com/search' for domain in domains: self.heading(domain, level=0) base_query = 'domain:' + domain hosts = [] # control variables new = True page = 0 nr = 100 # execute search engine queries and scrape results storing hostnames in a list # loop until no new hostnames are found while new == True: content = None query = '' # build query based on results of previous results for host in hosts: query += ' -domain:%s' % (host, ) full_query = base_query + query payload = {'pz': nr, 'b': (page * nr) + 1, 'p': full_query} # yahoo does not appear to have a max url length self.verbose('URL: %s?%s' % (base_url, encode_payload(payload))) # send query to search engine resp = self.request(base_url, method='POST', payload=payload) if resp.status_code != 200: self.alert( 'Yahoo has encountered an error. Please submit an issue for debugging.' ) break tree = fromstring(resp.text) sites = tree.xpath('//a[@class=" ac-algo ac-21th"]/@href') sites = [urlparse(x).hostname for x in sites] # create a unique list sites = list(set(sites)) new = False # add hostname to list if not already exists for site in sites: if site not in hosts: hosts.append(site) new = True self.output(site) self.add_hosts(site) if not new: # exit if all hostnames have been found if '>Next<' not in resp.text: break else: page += 1 self.verbose( 'No New Subdomains Found on the Current Page. Jumping to Result %d.' % ((page * nr) + 1)) new = True # sleep script to avoid lock-out self.verbose('Sleeping to avoid lockout...') time.sleep(random.randint(5, 15))
def get_company_id(self, company_name): self.heading(company_name, level=0) keywords = self.options['keywords'] all_companies = [] cnt = 0 size = 50 params = ' '.join([x for x in [company_name, keywords] if x]) url = 'https://www.jigsaw.com/rest/searchCompany.json' #while True: payload = { 'token': self.api_key, 'name': params, 'offset': cnt, 'pageSize': size } self.verbose('Query: %s?%s' % (url, encode_payload(payload))) resp = self.request(url, payload=payload, redirect=False) jsonobj = resp.json if jsonobj['totalHits'] == 0: self.output('No company matches found.') return else: companies = jsonobj['companies'] for company in companies: if company['activeContacts'] > 0: location = '%s, %s, %s' % ( company['city'], company['state'], company['country']) all_companies.append( (company['companyId'], company['name'], company['activeContacts'], location)) #cnt += size #if cnt > jsonobj['totalHits']: break # jigsaw rate limits requests per second to the api #time.sleep(.25) if len(all_companies) == 0: self.output( 'No contacts available for companies matching \'%s\'.' % (company_name)) return if len(all_companies) == 1: company_id = all_companies[0][0] company_name = all_companies[0][1] contact_cnt = all_companies[0][2] self.output('Unique company match found: [%s - %s (%s contacts)]' % (company_name, company_id, contact_cnt)) return company_id id_len = len(max([str(x[0]) for x in all_companies], key=len)) for company in all_companies: self.output('[%s] %s - %s (%s contacts)' % (str( company[0]).ljust(id_len), company[1], company[3], company[2])) company_id = raw_input('Enter Company ID from list [%s - %s]: ' % (all_companies[0][1], all_companies[0][0])) if not company_id: company_id = all_companies[0][0] return company_id
def module_run(self, domains): base_url = 'https://search.yahoo.com/search' for domain in domains: self.heading(domain, level=0) base_query = 'domain:' + domain hosts = [] # control variables new = True page = 0 nr = 100 # execute search engine queries and scrape results storing hostnames in a list # loop until no new hostnames are found while new == True: content = None query = '' # build query based on results of previous results for host in hosts: query += ' -domain:%s' % (host,) full_query = base_query + query payload = {'pz':nr, 'b':(page*nr)+1, 'p':full_query} # yahoo does not appear to have a max url length self.verbose('URL: %s?%s' % (base_url, encode_payload(payload))) # send query to search engine resp = self.request(base_url, method='POST', payload=payload) if resp.status_code != 200: self.alert('Yahoo has encountered an error. Please submit an issue for debugging.') break tree = fromstring(resp.text) sites = tree.xpath('//a[@class=" ac-algo ac-21th"]/@href') sites = [urlparse(x).hostname for x in sites] # create a unique list sites = list(set(sites)) new = False # add hostname to list if not already exists for site in sites: if site not in hosts: hosts.append(site) new = True self.output(site) self.add_hosts(site) if not new: # exit if all hostnames have been found if '>Next<' not in resp.text: break else: page += 1 self.verbose('No New Subdomains Found on the Current Page. Jumping to Result %d.' % ((page*nr)+1)) new = True # sleep script to avoid lock-out self.verbose('Sleeping to avoid lockout...') time.sleep(random.randint(5,15))
def module_run(self, domains): url = "http://searchdns.netcraft.com/" pattern = '<td align\="left">\s*<a href="http://(.*?)/"' # answer challenge cookie cookiejar = CookieJar() payload = {"restriction": "site+ends+with", "host": "test.com"} resp = self.request(url, payload=payload, cookiejar=cookiejar) cookiejar = resp.cookiejar for cookie in cookiejar: if cookie.name == "netcraft_js_verification_challenge": challenge = cookie.value response = hashlib.sha1(urllib.unquote(challenge)).hexdigest() cookiejar.set_cookie( self.make_cookie("netcraft_js_verification_response", "%s" % response, ".netcraft.com") ) break for domain in domains: self.heading(domain, level=0) payload["host"] = domain subs = [] # execute search engine queries and scrape results storing subdomains in a list # loop until no Next Page is available while True: self.verbose("URL: %s?%s" % (url, encode_payload(payload))) resp = self.request(url, payload=payload, cookiejar=cookiejar) content = resp.text sites = re.findall(pattern, content) # create a unique list sites = list(set(sites)) # add subdomain to list if not already exists for site in sites: if site not in subs: subs.append(site) self.output("%s" % (site)) self.add_hosts(site) # verifies if there's more pages to look while grabbing the correct # values for our payload... link = re.findall(r"(\blast\=\b|\bfrom\=\b)(.*?)&", content) if not link: break else: payload["last"] = link[0][1] payload["from"] = link[1][1] self.verbose("Next page available! Requesting again...") # sleep script to avoid lock-out self.verbose("Sleeping to Avoid Lock-out...") time.sleep(random.randint(5, 15)) if not subs: self.output("No results found.")
def module_run(self, domains): url = 'http://searchdns.netcraft.com/' pattern = '<td align\=\"left\">\s*<a href=\"http://(.*?)/"' # answer challenge cookie cookiejar = CookieJar() payload = {'restriction': 'site+ends+with', 'host': 'test.com'} resp = self.request(url, payload=payload, cookiejar=cookiejar) cookiejar = resp.cookiejar for cookie in cookiejar: if cookie.name == 'netcraft_js_verification_challenge': challenge = cookie.value response = hashlib.sha1(urllib.unquote(challenge)).hexdigest() cookiejar.set_cookie( self.make_cookie('netcraft_js_verification_response', '%s' % response, '.netcraft.com')) break for domain in domains: self.heading(domain, level=0) payload['host'] = domain subs = [] # execute search engine queries and scrape results storing subdomains in a list # loop until no Next Page is available while True: self.verbose('URL: %s?%s' % (url, encode_payload(payload))) resp = self.request(url, payload=payload, cookiejar=cookiejar) content = resp.text sites = re.findall(pattern, content) # create a unique list sites = list(set(sites)) # add subdomain to list if not already exists for site in sites: if site not in subs: subs.append(site) self.output('%s' % (site)) self.add_hosts(site) # verifies if there's more pages to look while grabbing the correct # values for our payload... link = re.findall(r'(\blast\=\b|\bfrom\=\b)(.*?)&', content) if not link: break else: payload['last'] = link[0][1] payload['from'] = link[1][1] self.verbose('Next page available! Requesting again...') # sleep script to avoid lock-out self.verbose('Sleeping to Avoid Lock-out...') time.sleep(random.randint(5, 15)) if not subs: self.output('No results found.')
def get_company_id(self, company_name): self.heading(company_name, level=0) keywords = self.options['keywords'] all_companies = [] cnt = 0 size = 50 params = ' '.join([x for x in [company_name, keywords] if x]) url = 'https://www.jigsaw.com/rest/searchCompany.json' #while True: payload = {'token': self.api_key, 'name': params, 'offset': cnt, 'pageSize': size} self.verbose('Query: %s?%s' % (url, encode_payload(payload))) resp = self.request(url, payload=payload, redirect=False) jsonobj = resp.json if jsonobj['totalHits'] == 0: self.output('No company matches found.') return else: companies = jsonobj['companies'] for company in companies: if company['activeContacts'] > 0: location = '%s, %s, %s' % (company['city'], company['state'], company['country']) all_companies.append((company['companyId'], company['name'], company['activeContacts'], location)) #cnt += size #if cnt > jsonobj['totalHits']: break # jigsaw rate limits requests per second to the api #time.sleep(.25) if len(all_companies) == 0: self.output('No contacts available for companies matching \'%s\'.' % (company_name)) return if len(all_companies) == 1: company_id = all_companies[0][0] company_name = all_companies[0][1] contact_cnt = all_companies[0][2] self.output('Unique company match found: [%s - %s (%s contacts)]' % (company_name, company_id, contact_cnt)) return company_id id_len = len(max([str(x[0]) for x in all_companies], key=len)) for company in all_companies: self.output('[%s] %s - %s (%s contacts)' % (str(company[0]).ljust(id_len), company[1], company[3], company[2])) company_id = raw_input('Enter Company ID from list [%s - %s]: ' % (all_companies[0][1], all_companies[0][0])) if not company_id: company_id = all_companies[0][0] return company_id