Exemple #1
0
    def crawling_scan(self, url, api_calls=None, all_found_urls=None):
        if api_calls is None:
            api_calls = []
        if all_found_urls is None:
            all_found_urls = []
        self.count = self.count - 1
        if self.count < 0:
            return

        har_parser = HarParser(self.har_directory,
                               search_string=self.search_string,
                               remove_params=self.remove_params)

        # If uncommented, will return as soon as a matching call is found
        # if self.search_string is not None and len(apiCalls) > 0:
        # 	return apiCalls
        try:
            print("Scanning URL: " + url)
            html = self.open_url(url)
            if html is not None:
                soup = BeautifulSoup(html, "lxml")

                har_obj = har_parser.get_single_har_file()
                api_calls = har_parser.scan_har_file(har_obj,
                                                     api_calls=api_calls)

                all_found_urls, new_urls = self.find_internal_urls(
                    soup, url, all_found_urls)
                shuffle(new_urls)

                for newUrl in new_urls:
                    self.crawling_scan(newUrl, api_calls, all_found_urls)

        except (KeyboardInterrupt, SystemExit):
            print("Stopping crawl")
            self.browser.close()
            api_writer = APIWriter(api_calls)
            api_writer.output_apis()
            sys.exit(1)
        return api_calls