def crawling_scan(self, url, api_calls=None, all_found_urls=None): if api_calls is None: api_calls = [] if all_found_urls is None: all_found_urls = [] self.count = self.count - 1 if self.count < 0: return har_parser = HarParser(self.har_directory, search_string=self.search_string, remove_params=self.remove_params) # If uncommented, will return as soon as a matching call is found # if self.search_string is not None and len(apiCalls) > 0: # return apiCalls try: print("Scanning URL: " + url) html = self.open_url(url) if html is not None: soup = BeautifulSoup(html, "lxml") har_obj = har_parser.get_single_har_file() api_calls = har_parser.scan_har_file(har_obj, api_calls=api_calls) all_found_urls, new_urls = self.find_internal_urls( soup, url, all_found_urls) shuffle(new_urls) for newUrl in new_urls: self.crawling_scan(newUrl, api_calls, all_found_urls) except (KeyboardInterrupt, SystemExit): print("Stopping crawl") self.browser.close() api_writer = APIWriter(api_calls) api_writer.output_apis() sys.exit(1) return api_calls