def findInLink(self, hostnameBase, url, linkId, totalLinks): self.context.out(message=self.context.strings['methods'] ['certificate-details']['find-link'], parseDict={ 'link-id': linkId, 'total-links': totalLinks }) # Use the crawler bot crawler = WCrawler() # json result result = None try: result = crawler.httpRequest(url=url) # Free memory (no navigation context) crawler.clearContext() except Exception as e: self.context.out(self.context.strings['methods'] ['certificate-details']['no-connect']) return # The http response is success? if (result['status-code'] != 200): self.context.out(message=self.context.strings['methods'] ['certificate-details']['wrong-status-http'], parseDict={'id': result['status-code']}) return matches = re.findall( br'>([\w\.\-\_\$]+?\.' + re.escape(hostnameBase).encode() + br')<', result['response-content']) if (len(matches) == 0): # Nothing return # Process all matches for item in matches: if ((not item.decode() in self.hostnames) and # Valid subdomain? (item.decode().endswith('.' + hostnameBase))): # For unique results self.hostnames.append(item.decode()) # Add full hostname self.context.addHostName( hostname=item.decode(), messageFormat=self.context.strings['methods'] ['certificate-details']['item-found'])
def find(self, hostnameBase): # Use the crawler bot crawler = WCrawler() # json result result = None try: result = crawler.httpRequest( url='https://crt.sh/?q=' + crawler.urlencode('%.' + hostnameBase) + '&output=json') # Free memory (no navigation context) crawler.clearContext() except Exception as e: self.context.out( self.context.strings['methods']['crt-sh']['no-connect']) return # The http response is success? if (result['status-code'] != 200): self.context.out(message=self.context.strings['methods']['crt-sh'] ['wrong-status-http'], parseDict={'id': result['status-code']}) return try: # Convert the result into json object result = json.loads(result['response-content']) except Exception as e: self.context.out( self.context.strings['methods']['crt-sh']['corrupt-response']) return if ((not isinstance(result, list)) or (len(result) == 0)): self.context.out( self.context.strings['methods']['crt-sh']['empty']) return # Process each hostname for item in result: # Drop root wildcards if (item['name_value'] == ('*.' + hostnameBase)): continue if (not item['name_value'] in self.hostnames): # For unique resulsts self.hostnames.append(item['name_value']) # Add full hostname self.context.addHostName( hostname=item['name_value'], messageFormat=self.context.strings['methods']['crt-sh'] ['item-found'])
def find(self, hostnameBase): # Use the crawler bot crawler = WCrawler() # html result result = None try: result = crawler.httpRequest( url='https://www.robtex.com/dns-lookup/' + crawler.urlencode(hostnameBase)) # Free memory (no navigation context) crawler.clearContext() except Exception as e: self.context.out( self.context.strings['methods']['robtex']['no-connect']) return # The http response is success? if (result['status-code'] != 200): self.context.out(message=self.context.strings['methods']['robtex'] ['wrong-status-http'], parseDict={'id': result['status-code']}) return # Remove strong tags # foo.<b>domain.com</b> result['response-content'] = result['response-content'].replace( b'<b>', b'').replace(b'</b>', b'') matches = re.findall( br'>([\w\.\-\_\$]+?\.' + re.escape(hostnameBase).encode() + br')<', result['response-content']) if (len(matches) == 0): self.context.out( self.context.strings['methods']['robtex']['empty']) return # Process all matches for item in matches: # Add full hostname self.context.addHostName( hostname=item.decode(), messageFormat=self.context.strings['methods']['robtex'] ['item-found'])
def find(self, hostnameBase, nextUrl=None, pageId=1): self.context.out( message=self.context.strings['methods']['virus-total']['paginating'], parseDict={ 'number': pageId } ) # Use the crawler bot crawler = WCrawler() # json result result = None try: if(nextUrl is None): result = crawler.httpRequest( url='https://www.virustotal.com/ui/domains/' + crawler.urlencode(hostnameBase) + '/subdomains?limit=40' ) else: result = crawler.httpRequest(nextUrl) # Free memory (no navigation context) crawler.clearContext() except Exception as e: self.context.out( self.context.strings['methods']['virus-total']['no-connect'] ) return # The http response is success? if(result['status-code'] != 200): self.context.out( message=self.context.strings['methods']['virus-total']['wrong-status-http'], parseDict={ 'id': result['status-code'] } ) return try: # Convert the result into json object result = json.loads(result['response-content']) except Exception as e: self.context.out( self.context.strings['methods']['virus-total']['corrupt-response'] ) return if(len(result['data']) == 0): self.context.out(self.context.strings['methods']['virus-total']['no-more']) return # Process all subdomains found for item in result['data']: # Unique results for this instance if(str(item['id']) in self.hostnames): continue # Add to current stack for unique results self.hostnames.append(str(item['id'])) # Add full hostname self.context.addHostName( hostname=str(item['id']), messageFormat=self.context.strings['methods']['virus-total']['item-found'] ) # Need paginate? if( ('links' in result) and ('next' in result['links']) and (result['links']) ): self.find( hostnameBase=hostnameBase, nextUrl=str(result['links']['next']), pageId=(pageId + 1) )
def paginate(self, hostnameBase, pageNumber=1): searchContext = { 'max-pages': 15, 'max-result': 10, 'start-index': 1, 'query': 'site:' + hostnameBase } if (self.hostnames): # Does not process known subdomains searchContext['query'] += ' -site:' + ' -site:'.join( self.hostnames) # Current start item number searchContext['start-index'] = (( (pageNumber - 1) * searchContext['max-result']) + 1) # Header message for pagination self.context.out( self.context.strings['methods']['google']['pagination']) # Use the crawler bot crawler = WCrawler() # json result result = None try: result = crawler.httpRequest( 'https://www.googleapis.com/customsearch/v1?' + 'cx=' + crawler.urlencode(self.googleCx) + '&key=' + crawler.urlencode(self.googleApiKey) + '&q=' + crawler.urlencode(searchContext['query']) + '&start=' + str(searchContext['start-index']) + '&filter=1&safe=off&num=' + str(searchContext['max-result'])) # Free memory (no navigation context) crawler.clearContext() except Exception as e: self.context.out( self.context.strings['methods']['google']['no-connect']) return if (result['status-code'] in [403, 400]): # No more resulsts self.context.out( self.context.strings['methods']['google']['no-more-results']) return # The http response is success? if (result['status-code'] != 200): self.context.out(message=self.context.strings['methods']['google'] ['wrong-status-http'], parseDict={'id': result['status-code']}) return try: # Convert the result into json object result = json.loads(result['response-content']) except Exception as e: self.context.out( self.context.strings['methods']['google']['corrupt-response']) return if ((not 'items' in result) or (len(result['items']) == 0)): self.context.out( self.context.strings['methods']['google']['no-more-results']) return # Process each result for item in result['items']: # Is a valid subdomain? if (not item['displayLink'].endswith('.' + hostnameBase)): continue if (not item['displayLink'] in self.hostnames): # For unique resulsts self.hostnames.append(item['displayLink']) # Add full hostname self.context.addHostName( hostname=item['displayLink'], messageFormat=self.context.strings['methods']['google'] ['item-found']) # Return to first page again pageNumber = 0 # Limit of pages if (pageNumber >= searchContext['max-pages']): self.context.out( self.context.strings['methods']['google']['no-more-results']) return # Next page self.paginate(hostnameBase=hostnameBase, pageNumber=pageNumber + 1)
def find(self, hostnameBase): # Use the crawler bot crawler = WCrawler() # json result result = None try: result = crawler.httpRequest( url='https://certificatedetails.com/api/list/' + crawler.urlencode(hostnameBase)) # Free memory (no navigation context) crawler.clearContext() except Exception as e: self.context.out(self.context.strings['methods'] ['certificate-details']['no-connect']) return # The http response is success? if (result['status-code'] != 200): self.context.out(message=self.context.strings['methods'] ['certificate-details']['wrong-status-http'], parseDict={'id': result['status-code']}) return try: # Convert the result into json object result = json.loads(result['response-content']) except Exception as e: self.context.out(self.context.strings['methods'] ['certificate-details']['corrupt-response']) return if ((not isinstance(result, list)) or (len(result) == 0)): self.context.out(self.context.strings['methods'] ['certificate-details']['empty']) return # Process each hostname for item in result: # Drop root wildcards if (item['CommonName'] == ('*.' + hostnameBase)): continue # Valid subdomain? if (not item['CommonName'].endswith('.' + hostnameBase)): continue if (item['CommonName'].startswith('*.')): item['CommonName'] = item['CommonName'][2:] if (not item['CommonName'] in self.hostnames): # For unique results self.hostnames.append(item['CommonName']) # Add full hostname self.context.addHostName( hostname=item['CommonName'], messageFormat=self.context.strings['methods']['crt-sh'] ['item-found']) # Header message to process all links self.context.out(self.context.strings['methods']['certificate-details'] ['find-links']) # Current link id linkId = 0 # Process each link # Caution: Same hostname contain one o more certificates for item in result: linkId += 1 self.findInLink(hostnameBase=hostnameBase, url='https://certificatedetails.com' + item['Link'], linkId=linkId, totalLinks=len(result))
def find(self, hostnameBase): self.context.out(self.context.strings['methods']['dnsdumpster'] ['getting-token-xsrf']) # Use the crawler bot crawler = WCrawler() # json result result = None try: result = crawler.httpRequest(url='https://dnsdumpster.com/') except Exception as e: self.context.out( self.context.strings['methods']['dnsdumpster']['no-connect']) return # The http response is success? if (result['status-code'] != 200): self.context.out(message=self.context.strings['methods'] ['dnsdumpster']['wrong-status-http'], parseDict={'id': result['status-code']}) return # Find token XSRF matches = re.search(br'name=\'csrfmiddlewaretoken\'\s+value=\'(.+?)\'', result['response-content'], re.I | re.M) if (not matches): # No token found self.context.out(self.context.strings['methods']['robtex'] ['no-xsrf-token-found']) return # El token XSRF tokenXsrf = matches.group(1) self.context.out(self.context.strings['methods']['dnsdumpster'] ['getting-subdomains']) try: result = crawler.httpRequest(url='https://dnsdumpster.com/', postData={ 'csrfmiddlewaretoken': tokenXsrf, 'targetip': hostnameBase }) except Exception as e: raise e self.context.out( self.context.strings['methods']['dnsdumpster']['no-connect']) return # The http response is success? if (result['status-code'] != 200): print(result) self.context.out(message=self.context.strings['methods'] ['dnsdumpster']['wrong-status-http'], parseDict={'id': result['status-code']}) return matches = re.findall( br'>([\w\.\-\_\$]+?\.' + re.escape(hostnameBase).encode() + br')<', result['response-content']) if (len(matches) == 0): self.context.out( self.context.strings['methods']['dnsdumpster']['empty']) return # Process all matches for item in matches: # Add full hostname self.context.addHostName( hostname=item.decode(), messageFormat=self.context.strings['methods']['dnsdumpster'] ['item-found'])
def paginate(self, hostnameBase, pageNumber=1): searchContext = { 'max-pages': 15, 'max-result': 10, 'start-index': 1, 'query': 'domain:' + hostnameBase } if (self.hostnames): # Does not process known subdomains searchContext['query'] += ' -domain:' + ' -domain:'.join( self.hostnames) # Current start item number searchContext['start-index'] = (( (pageNumber - 1) * searchContext['max-result']) + 1) # Header message for pagination self.context.out( message=self.context.strings['methods']['bing']['pagination']) # Use the crawler bot crawler = WCrawler() # json result result = None try: result = crawler.httpRequest( 'https://www.bing.com/search?' + '&q=' + crawler.urlencode(searchContext['query']) + '&first=' + str(searchContext['start-index'])) # Free memory (no navigation context) crawler.clearContext() except Exception as e: self.context.out( self.context.strings['methods']['bing']['no-connect']) return # The http response is success? if (result['status-code'] != 200): self.context.out(message=self.context.strings['methods']['bing'] ['wrong-status-http'], parseDict={'id': result['status-code']}) return # Example: <cite>https://foo<strong>domain.com</strong> matches = re.findall( br'>([\w\.\-\_\$]+?\.' + re.escape(hostnameBase).encode() + br')', result['response-content'].replace( b'<strong>' + hostnameBase.encode(), b'.' + hostnameBase.encode())) if (len(matches) == 0): self.context.out( self.context.strings['methods']['bing']['no-more-results']) return # Process all matches for item in matches: # Unique resulsts if (item.decode() in self.hostnames): continue # Add to unique stack self.hostnames.append(item.decode()) # Add full hostname self.context.addHostName( hostname=item.decode(), messageFormat=self.context.strings['methods']['bing'] ['item-found']) # Can continue to next page? if (not b'sw_next' in result['response-content']): self.context.out( self.context.strings['methods']['bing']['no-more-results']) return # Limit of pages if (pageNumber >= searchContext['max-pages']): self.context.out( self.context.strings['methods']['bing']['no-more-results']) return # Next page self.paginate(hostnameBase=hostnameBase, pageNumber=pageNumber + 1)