Exemple #1
0
	def crawlingScan(self, url, apiCalls = [], allFoundURLs = []):
		
		self.count = self.count - 1
		if self.count < 0:
			return

		harParser = HarParser(self.harDirectory, searchString=self.searchString, removeParams=self.removeParams)

		#If uncommented, will return as soon as a matching call is found
		#if self.searchString is not None and len(apiCalls) > 0:
		#	return apiCalls
		try:
			print("Scanning URL: "+url)
			html = self.openURL(url)
			if html is not None:
				bsObj = BeautifulSoup(html, "lxml")

				harObj = harParser.getSingleHarFile()
				apiCalls = harParser.scanHarfile(harObj, apiCalls=apiCalls)

				allFoundURLs, newUrls = self.findInternalURLs(bsObj, url, allFoundURLs)
				shuffle(newUrls)
				
				for newUrl in newUrls:
					self.crawlingScan(newUrl, apiCalls, allFoundURLs)
		
		except (KeyboardInterrupt, SystemExit):
			print("Stopping crawl")
			self.browser.close()
			apiWriter = APIWriter(apiCalls)
			apiWriter.outputAPIs()
			exit(1)
		return apiCalls
Exemple #2
0
    def crawlingScan(self, url, apiCalls=[], allFoundURLs=[]):
        self.count = self.count - 1
        if self.count < 0:
            return

        harParser = HarParser(self.harDirectory,
                              searchString=self.searchString,
                              removeParams=self.removeParams)

        #If uncommented, will return as soon as a matching call is found
        #if self.searchString is not None and len(apiCalls) > 0:
        #	return apiCalls
        try:
            print("Scanning URL: " + url)
            html = self.openURL(url)
            if html is not None:
                bsObj = BeautifulSoup(html, "lxml")

                harObj = harParser.getSingleHarFile()
                apiCalls = harParser.scanHarfile(harObj, apiCalls=apiCalls)

                allFoundURLs, newUrls = self.findInternalURLs(
                    bsObj, url, allFoundURLs)
                shuffle(newUrls)

                for newUrl in newUrls:
                    self.crawlingScan(newUrl, apiCalls, allFoundURLs)

        except (KeyboardInterrupt, SystemExit):
            print("Stopping crawl")
            self.browser.close()
            apiWriter = APIWriter(apiCalls)
            apiWriter.outputAPIs()
            exit(1)
        return apiCalls