Python HarParser Examples

Programming Language: Python

Namespace/Package Name: harParser

Class/Type: HarParser

Examples at hotexamples.com: 5

Python HarParser - 5 examples found. These are the top rated real world Python examples of harParser.HarParser extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

HarParser(3)

getSingleHarFile(1)

get_single_har_file(1)

parseMultipleHars(1)

parse_multiple_hars(1)

scanHarfile(1)

scan_har_file(1)

Example #1

Show file

    def start(self):
        if self.count > 1 and self.url is None:
            print("Cannot provide page count with no URL given")
            exit(1)
        if self.remove_params and self.url is None:
            print(
                "WARNING: Must have Internet connection to remove unneeded parameters"
            )

        # Scan for all APIs
        if self.url:
            os.makedirs(self.har_directory, exist_ok=True)
            self.delete_existing_hars()
            self.browser = Browser(
                "chromedriver/chromedriver",
                "browsermob-proxy-2.1.4/bin/browsermob-proxy",
                self.har_directory,
                cookies=self.cookies)
            if self.search_string is not None:
                print("Searching URL " + self.url + " for string " +
                      self.search_string)
            # Move recursively through the site
            api_calls = self.crawling_scan(self.url)

        # Scan directory of har files
        else:
            print("Parsing existing directory of har files")
            har_parser = HarParser(self.har_directory, self.search_string,
                                   self.remove_params)
            api_calls = har_parser.parse_multiple_hars()

        if self.browser is not None:
            self.browser.close()

        return api_calls

Example #2

Show file

	def crawlingScan(self, url, apiCalls = [], allFoundURLs = []):
		
		self.count = self.count - 1
		if self.count < 0:
			return

		harParser = HarParser(self.harDirectory, searchString=self.searchString, removeParams=self.removeParams)

		#If uncommented, will return as soon as a matching call is found
		#if self.searchString is not None and len(apiCalls) > 0:
		#	return apiCalls
		try:
			print("Scanning URL: "+url)
			html = self.openURL(url)
			if html is not None:
				bsObj = BeautifulSoup(html, "lxml")

				harObj = harParser.getSingleHarFile()
				apiCalls = harParser.scanHarfile(harObj, apiCalls=apiCalls)

				allFoundURLs, newUrls = self.findInternalURLs(bsObj, url, allFoundURLs)
				shuffle(newUrls)
				
				for newUrl in newUrls:
					self.crawlingScan(newUrl, apiCalls, allFoundURLs)
		
		except (KeyboardInterrupt, SystemExit):
			print("Stopping crawl")
			self.browser.close()
			apiWriter = APIWriter(apiCalls)
			apiWriter.outputAPIs()
			exit(1)
		return apiCalls

Example #3

Show file

	def start(self):
		if self.count > 1 and self.url is None:
			print("Cannot provide page count with no URL given")
			exit(1)
		if self.removeParams and self.url is None:
			print("WARNING: Must have Internet connection to remove unneeded parameters")

		#Scan for all APIs
		if self.url:
			os.makedirs(self.harDirectory,exist_ok=True)
			self.deleteExistingHars()
			self.browser = Browser("chromedriver/chromedriver", "browsermob-proxy-2.1.4/bin/browsermob-proxy", self.harDirectory, cookies=self.cookies)
			if self.searchString is not None:
				print("Searching URL "+self.url+" for string "+self.searchString)
			#Move recursively through the site
			apiCalls = self.crawlingScan(self.url)
			
		#Scan directory of har files
		else:
			print("Parsing existing directory of har files")
			harParser = HarParser(self.harDirectory, self.searchString, self.removeParams)
			apiCalls = harParser.parseMultipleHars()

		if self.browser is not None:
			self.browser.close()

		return apiCalls

Example #4

Show file

File: apiFinder.py Project: Shaw622/Scraping

    def crawlingScan(self, url, apiCalls=[], allFoundURLs=[]):
        self.count = self.count - 1
        if self.count < 0:
            return

        harParser = HarParser(self.harDirectory,
                              searchString=self.searchString,
                              removeParams=self.removeParams)

        #If uncommented, will return as soon as a matching call is found
        #if self.searchString is not None and len(apiCalls) > 0:
        #	return apiCalls
        try:
            print("Scanning URL: " + url)
            html = self.openURL(url)
            if html is not None:
                bsObj = BeautifulSoup(html, "lxml")

                harObj = harParser.getSingleHarFile()
                apiCalls = harParser.scanHarfile(harObj, apiCalls=apiCalls)

                allFoundURLs, newUrls = self.findInternalURLs(
                    bsObj, url, allFoundURLs)
                shuffle(newUrls)

                for newUrl in newUrls:
                    self.crawlingScan(newUrl, apiCalls, allFoundURLs)

        except (KeyboardInterrupt, SystemExit):
            print("Stopping crawl")
            self.browser.close()
            apiWriter = APIWriter(apiCalls)
            apiWriter.outputAPIs()
            exit(1)
        return apiCalls

Example #5

Show file

    def crawling_scan(self, url, api_calls=None, all_found_urls=None):
        if api_calls is None:
            api_calls = []
        if all_found_urls is None:
            all_found_urls = []
        self.count = self.count - 1
        if self.count < 0:
            return

        har_parser = HarParser(self.har_directory,
                               search_string=self.search_string,
                               remove_params=self.remove_params)

        # If uncommented, will return as soon as a matching call is found
        # if self.search_string is not None and len(apiCalls) > 0:
        # 	return apiCalls
        try:
            print("Scanning URL: " + url)
            html = self.open_url(url)
            if html is not None:
                soup = BeautifulSoup(html, "lxml")

                har_obj = har_parser.get_single_har_file()
                api_calls = har_parser.scan_har_file(har_obj,
                                                     api_calls=api_calls)

                all_found_urls, new_urls = self.find_internal_urls(
                    soup, url, all_found_urls)
                shuffle(new_urls)

                for newUrl in new_urls:
                    self.crawling_scan(newUrl, api_calls, all_found_urls)

        except (KeyboardInterrupt, SystemExit):
            print("Stopping crawl")
            self.browser.close()
            api_writer = APIWriter(api_calls)
            api_writer.output_apis()
            sys.exit(1)
        return api_calls