Python WCrawler Exemples, modules.crawler.WCrawler Python Exemples

Exemple #1

0

Afficher le fichier

    def findInLink(self, hostnameBase, url, linkId, totalLinks):

        self.context.out(message=self.context.strings['methods']
                         ['certificate-details']['find-link'],
                         parseDict={
                             'link-id': linkId,
                             'total-links': totalLinks
                         })

        # Use the crawler bot
        crawler = WCrawler()

        # json result
        result = None

        try:
            result = crawler.httpRequest(url=url)

            # Free memory (no navigation context)
            crawler.clearContext()

        except Exception as e:
            self.context.out(self.context.strings['methods']
                             ['certificate-details']['no-connect'])
            return

        # The http response is success?
        if (result['status-code'] != 200):
            self.context.out(message=self.context.strings['methods']
                             ['certificate-details']['wrong-status-http'],
                             parseDict={'id': result['status-code']})
            return

        matches = re.findall(
            br'>([\w\.\-\_\$]+?\.' + re.escape(hostnameBase).encode() + br')<',
            result['response-content'])

        if (len(matches) == 0):
            # Nothing
            return

        # Process all matches
        for item in matches:

            if ((not item.decode() in self.hostnames) and

                    # Valid subdomain?
                (item.decode().endswith('.' + hostnameBase))):

                # For unique results
                self.hostnames.append(item.decode())

                # Add full hostname
                self.context.addHostName(
                    hostname=item.decode(),
                    messageFormat=self.context.strings['methods']
                    ['certificate-details']['item-found'])

Exemple #2

0

Afficher le fichier

    def find(self, hostnameBase):

        # Use the crawler bot
        crawler = WCrawler()

        # json result
        result = None

        try:
            result = crawler.httpRequest(
                url='https://crt.sh/?q=' +
                crawler.urlencode('%.' + hostnameBase) + '&output=json')

            # Free memory (no navigation context)
            crawler.clearContext()

        except Exception as e:
            self.context.out(
                self.context.strings['methods']['crt-sh']['no-connect'])
            return

        # The http response is success?
        if (result['status-code'] != 200):
            self.context.out(message=self.context.strings['methods']['crt-sh']
                             ['wrong-status-http'],
                             parseDict={'id': result['status-code']})
            return

        try:
            # Convert the result into json object
            result = json.loads(result['response-content'])

        except Exception as e:
            self.context.out(
                self.context.strings['methods']['crt-sh']['corrupt-response'])
            return

        if ((not isinstance(result, list)) or (len(result) == 0)):
            self.context.out(
                self.context.strings['methods']['crt-sh']['empty'])
            return

        # Process each hostname
        for item in result:

            # Drop root wildcards
            if (item['name_value'] == ('*.' + hostnameBase)):
                continue

            if (not item['name_value'] in self.hostnames):

                # For unique resulsts
                self.hostnames.append(item['name_value'])

                # Add full hostname
                self.context.addHostName(
                    hostname=item['name_value'],
                    messageFormat=self.context.strings['methods']['crt-sh']
                    ['item-found'])

Exemple #3

0

Afficher le fichier

    def find(self, hostnameBase):

        # Use the crawler bot
        crawler = WCrawler()

        # html result
        result = None

        try:
            result = crawler.httpRequest(
                url='https://www.robtex.com/dns-lookup/' +
                crawler.urlencode(hostnameBase))

            # Free memory (no navigation context)
            crawler.clearContext()

        except Exception as e:
            self.context.out(
                self.context.strings['methods']['robtex']['no-connect'])
            return

        # The http response is success?
        if (result['status-code'] != 200):
            self.context.out(message=self.context.strings['methods']['robtex']
                             ['wrong-status-http'],
                             parseDict={'id': result['status-code']})
            return

        # Remove strong tags
        # foo.<b>domain.com</b>
        result['response-content'] = result['response-content'].replace(
            b'<b>', b'').replace(b'</b>', b'')

        matches = re.findall(
            br'>([\w\.\-\_\$]+?\.' + re.escape(hostnameBase).encode() + br')<',
            result['response-content'])

        if (len(matches) == 0):
            self.context.out(
                self.context.strings['methods']['robtex']['empty'])
            return

        # Process all matches
        for item in matches:

            # Add full hostname
            self.context.addHostName(
                hostname=item.decode(),
                messageFormat=self.context.strings['methods']['robtex']
                ['item-found'])

Exemple #4

0

Afficher le fichier

Fichier : virustotal.py Projet : huasohacker/wss

    def find(self, hostnameBase, nextUrl=None, pageId=1):

        self.context.out(
            message=self.context.strings['methods']['virus-total']['paginating'],
            parseDict={
                'number': pageId
            }
        )

        # Use the crawler bot
        crawler = WCrawler()

        # json result
        result = None

        try:
            if(nextUrl is None):
                result = crawler.httpRequest(
                    url='https://www.virustotal.com/ui/domains/' + crawler.urlencode(hostnameBase) + '/subdomains?limit=40'
                )
            else:
                result = crawler.httpRequest(nextUrl)

            # Free memory (no navigation context)
            crawler.clearContext()

        except Exception as e:
            self.context.out(
                self.context.strings['methods']['virus-total']['no-connect']
            )
            return

        # The http response is success?
        if(result['status-code'] != 200):
            self.context.out(
                message=self.context.strings['methods']['virus-total']['wrong-status-http'],
                parseDict={
                    'id': result['status-code']
                }
            )
            return

        try:
            # Convert the result into json object
            result = json.loads(result['response-content'])

        except Exception as e:
            self.context.out(
                self.context.strings['methods']['virus-total']['corrupt-response']
            )
            return

        if(len(result['data']) == 0):
            self.context.out(self.context.strings['methods']['virus-total']['no-more'])
            return

        # Process all subdomains found
        for item in result['data']:

            # Unique results for this instance
            if(str(item['id']) in self.hostnames):
                continue

            # Add to current stack for unique results
            self.hostnames.append(str(item['id']))

            # Add full hostname
            self.context.addHostName(
                hostname=str(item['id']),
                messageFormat=self.context.strings['methods']['virus-total']['item-found']
            )

        # Need paginate?
        if(
            ('links' in result) and
            ('next' in result['links']) and
            (result['links'])
        ):
            self.find(
                hostnameBase=hostnameBase,
                nextUrl=str(result['links']['next']),
                pageId=(pageId + 1)
            )

Exemple #5

0

Afficher le fichier

    def paginate(self, hostnameBase, pageNumber=1):

        searchContext = {
            'max-pages': 15,
            'max-result': 10,
            'start-index': 1,
            'query': 'site:' + hostnameBase
        }

        if (self.hostnames):
            # Does not process known subdomains
            searchContext['query'] += ' -site:' + ' -site:'.join(
                self.hostnames)

        # Current start item number
        searchContext['start-index'] = ((
            (pageNumber - 1) * searchContext['max-result']) + 1)

        # Header message for pagination
        self.context.out(
            self.context.strings['methods']['google']['pagination'])

        # Use the crawler bot
        crawler = WCrawler()

        # json result
        result = None

        try:
            result = crawler.httpRequest(
                'https://www.googleapis.com/customsearch/v1?' + 'cx=' +
                crawler.urlencode(self.googleCx) + '&key=' +
                crawler.urlencode(self.googleApiKey) + '&q=' +
                crawler.urlencode(searchContext['query']) + '&start=' +
                str(searchContext['start-index']) + '&filter=1&safe=off&num=' +
                str(searchContext['max-result']))

            # Free memory (no navigation context)
            crawler.clearContext()

        except Exception as e:
            self.context.out(
                self.context.strings['methods']['google']['no-connect'])
            return

        if (result['status-code'] in [403, 400]):
            # No more resulsts
            self.context.out(
                self.context.strings['methods']['google']['no-more-results'])
            return

        # The http response is success?
        if (result['status-code'] != 200):
            self.context.out(message=self.context.strings['methods']['google']
                             ['wrong-status-http'],
                             parseDict={'id': result['status-code']})
            return

        try:
            # Convert the result into json object
            result = json.loads(result['response-content'])

        except Exception as e:
            self.context.out(
                self.context.strings['methods']['google']['corrupt-response'])
            return

        if ((not 'items' in result) or (len(result['items']) == 0)):
            self.context.out(
                self.context.strings['methods']['google']['no-more-results'])
            return

        # Process each result
        for item in result['items']:

            # Is a valid subdomain?
            if (not item['displayLink'].endswith('.' + hostnameBase)):
                continue

            if (not item['displayLink'] in self.hostnames):

                # For unique resulsts
                self.hostnames.append(item['displayLink'])

                # Add full hostname
                self.context.addHostName(
                    hostname=item['displayLink'],
                    messageFormat=self.context.strings['methods']['google']
                    ['item-found'])

                # Return to first page again
                pageNumber = 0

        # Limit of pages
        if (pageNumber >= searchContext['max-pages']):
            self.context.out(
                self.context.strings['methods']['google']['no-more-results'])
            return

        # Next page
        self.paginate(hostnameBase=hostnameBase, pageNumber=pageNumber + 1)

Exemple #6

0

Afficher le fichier

    def find(self, hostnameBase):

        # Use the crawler bot
        crawler = WCrawler()

        # json result
        result = None

        try:
            result = crawler.httpRequest(
                url='https://certificatedetails.com/api/list/' +
                crawler.urlencode(hostnameBase))

            # Free memory (no navigation context)
            crawler.clearContext()

        except Exception as e:
            self.context.out(self.context.strings['methods']
                             ['certificate-details']['no-connect'])
            return

        # The http response is success?
        if (result['status-code'] != 200):
            self.context.out(message=self.context.strings['methods']
                             ['certificate-details']['wrong-status-http'],
                             parseDict={'id': result['status-code']})
            return

        try:
            # Convert the result into json object
            result = json.loads(result['response-content'])

        except Exception as e:
            self.context.out(self.context.strings['methods']
                             ['certificate-details']['corrupt-response'])
            return

        if ((not isinstance(result, list)) or (len(result) == 0)):
            self.context.out(self.context.strings['methods']
                             ['certificate-details']['empty'])
            return

        # Process each hostname
        for item in result:

            # Drop root wildcards
            if (item['CommonName'] == ('*.' + hostnameBase)):
                continue

            # Valid subdomain?
            if (not item['CommonName'].endswith('.' + hostnameBase)):
                continue

            if (item['CommonName'].startswith('*.')):
                item['CommonName'] = item['CommonName'][2:]

            if (not item['CommonName'] in self.hostnames):

                # For unique results
                self.hostnames.append(item['CommonName'])

                # Add full hostname
                self.context.addHostName(
                    hostname=item['CommonName'],
                    messageFormat=self.context.strings['methods']['crt-sh']
                    ['item-found'])

        # Header message to process all links
        self.context.out(self.context.strings['methods']['certificate-details']
                         ['find-links'])

        # Current link id
        linkId = 0

        # Process each link
        # Caution: Same hostname contain one o more certificates
        for item in result:

            linkId += 1

            self.findInLink(hostnameBase=hostnameBase,
                            url='https://certificatedetails.com' +
                            item['Link'],
                            linkId=linkId,
                            totalLinks=len(result))

Exemple #7

0

Afficher le fichier

    def find(self, hostnameBase):

        self.context.out(self.context.strings['methods']['dnsdumpster']
                         ['getting-token-xsrf'])

        # Use the crawler bot
        crawler = WCrawler()

        # json result
        result = None

        try:
            result = crawler.httpRequest(url='https://dnsdumpster.com/')

        except Exception as e:
            self.context.out(
                self.context.strings['methods']['dnsdumpster']['no-connect'])
            return

        # The http response is success?
        if (result['status-code'] != 200):
            self.context.out(message=self.context.strings['methods']
                             ['dnsdumpster']['wrong-status-http'],
                             parseDict={'id': result['status-code']})
            return

        # Find token XSRF
        matches = re.search(br'name=\'csrfmiddlewaretoken\'\s+value=\'(.+?)\'',
                            result['response-content'], re.I | re.M)

        if (not matches):
            # No token found
            self.context.out(self.context.strings['methods']['robtex']
                             ['no-xsrf-token-found'])
            return

        # El token XSRF
        tokenXsrf = matches.group(1)

        self.context.out(self.context.strings['methods']['dnsdumpster']
                         ['getting-subdomains'])

        try:
            result = crawler.httpRequest(url='https://dnsdumpster.com/',
                                         postData={
                                             'csrfmiddlewaretoken': tokenXsrf,
                                             'targetip': hostnameBase
                                         })

        except Exception as e:
            raise e
            self.context.out(
                self.context.strings['methods']['dnsdumpster']['no-connect'])
            return

        # The http response is success?
        if (result['status-code'] != 200):
            print(result)
            self.context.out(message=self.context.strings['methods']
                             ['dnsdumpster']['wrong-status-http'],
                             parseDict={'id': result['status-code']})
            return

        matches = re.findall(
            br'>([\w\.\-\_\$]+?\.' + re.escape(hostnameBase).encode() + br')<',
            result['response-content'])

        if (len(matches) == 0):
            self.context.out(
                self.context.strings['methods']['dnsdumpster']['empty'])
            return

        # Process all matches
        for item in matches:

            # Add full hostname
            self.context.addHostName(
                hostname=item.decode(),
                messageFormat=self.context.strings['methods']['dnsdumpster']
                ['item-found'])

Exemple #8

0

Afficher le fichier

Fichier : bing.py Projet : huasohacker/wss

    def paginate(self, hostnameBase, pageNumber=1):

        searchContext = {
            'max-pages': 15,
            'max-result': 10,
            'start-index': 1,
            'query': 'domain:' + hostnameBase
        }

        if (self.hostnames):
            # Does not process known subdomains
            searchContext['query'] += ' -domain:' + ' -domain:'.join(
                self.hostnames)

        # Current start item number
        searchContext['start-index'] = ((
            (pageNumber - 1) * searchContext['max-result']) + 1)

        # Header message for pagination
        self.context.out(
            message=self.context.strings['methods']['bing']['pagination'])

        # Use the crawler bot
        crawler = WCrawler()

        # json result
        result = None

        try:
            result = crawler.httpRequest(
                'https://www.bing.com/search?' + '&q=' +
                crawler.urlencode(searchContext['query']) + '&first=' +
                str(searchContext['start-index']))

            # Free memory (no navigation context)
            crawler.clearContext()

        except Exception as e:
            self.context.out(
                self.context.strings['methods']['bing']['no-connect'])
            return

        # The http response is success?
        if (result['status-code'] != 200):
            self.context.out(message=self.context.strings['methods']['bing']
                             ['wrong-status-http'],
                             parseDict={'id': result['status-code']})
            return

        # Example: <cite>https://foo<strong>domain.com</strong>
        matches = re.findall(
            br'>([\w\.\-\_\$]+?\.' + re.escape(hostnameBase).encode() + br')',
            result['response-content'].replace(
                b'<strong>' + hostnameBase.encode(),
                b'.' + hostnameBase.encode()))

        if (len(matches) == 0):
            self.context.out(
                self.context.strings['methods']['bing']['no-more-results'])
            return

        # Process all matches
        for item in matches:

            # Unique resulsts
            if (item.decode() in self.hostnames):
                continue

            # Add to unique stack
            self.hostnames.append(item.decode())

            # Add full hostname
            self.context.addHostName(
                hostname=item.decode(),
                messageFormat=self.context.strings['methods']['bing']
                ['item-found'])

        # Can continue to next page?
        if (not b'sw_next' in result['response-content']):
            self.context.out(
                self.context.strings['methods']['bing']['no-more-results'])
            return

        # Limit of pages
        if (pageNumber >= searchContext['max-pages']):
            self.context.out(
                self.context.strings['methods']['bing']['no-more-results'])
            return

        # Next page
        self.paginate(hostnameBase=hostnameBase, pageNumber=pageNumber + 1)