Python APIWriter Examples, apicall.APIWriter Python Examples

Example #1

0

Show file

	def crawlingScan(self, url, apiCalls = [], allFoundURLs = []):
		
		self.count = self.count - 1
		if self.count < 0:
			return

		harParser = HarParser(self.harDirectory, searchString=self.searchString, removeParams=self.removeParams)

		#If uncommented, will return as soon as a matching call is found
		#if self.searchString is not None and len(apiCalls) > 0:
		#	return apiCalls
		try:
			print("Scanning URL: "+url)
			html = self.openURL(url)
			if html is not None:
				bsObj = BeautifulSoup(html, "lxml")

				harObj = harParser.getSingleHarFile()
				apiCalls = harParser.scanHarfile(harObj, apiCalls=apiCalls)

				allFoundURLs, newUrls = self.findInternalURLs(bsObj, url, allFoundURLs)
				shuffle(newUrls)
				
				for newUrl in newUrls:
					self.crawlingScan(newUrl, apiCalls, allFoundURLs)
		
		except (KeyboardInterrupt, SystemExit):
			print("Stopping crawl")
			self.browser.close()
			apiWriter = APIWriter(apiCalls)
			apiWriter.outputAPIs()
			exit(1)
		return apiCalls

Example #2

0

Show file

File: apiFinder.py Project: Shaw622/Scraping

    def crawlingScan(self, url, apiCalls=[], allFoundURLs=[]):
        self.count = self.count - 1
        if self.count < 0:
            return

        harParser = HarParser(self.harDirectory,
                              searchString=self.searchString,
                              removeParams=self.removeParams)

        #If uncommented, will return as soon as a matching call is found
        #if self.searchString is not None and len(apiCalls) > 0:
        #	return apiCalls
        try:
            print("Scanning URL: " + url)
            html = self.openURL(url)
            if html is not None:
                bsObj = BeautifulSoup(html, "lxml")

                harObj = harParser.getSingleHarFile()
                apiCalls = harParser.scanHarfile(harObj, apiCalls=apiCalls)

                allFoundURLs, newUrls = self.findInternalURLs(
                    bsObj, url, allFoundURLs)
                shuffle(newUrls)

                for newUrl in newUrls:
                    self.crawlingScan(newUrl, apiCalls, allFoundURLs)

        except (KeyboardInterrupt, SystemExit):
            print("Stopping crawl")
            self.browser.close()
            apiWriter = APIWriter(apiCalls)
            apiWriter.outputAPIs()
            exit(1)
        return apiCalls

Example #3

0

Show file

File: webservice.py Project: Shaw622/Scraping

def search():
    #(self, url=None, harDirectory=None, searchString=None, removeParams=False, count=1)
    searchStr = request.args.get('search')
    urlStr = request.args.get('url')
    finder = APIFinder(url=urlStr, searchString=searchStr)
    apiCalls = finder.start()
    writer = APIWriter(apiCalls)
    return writer.outputJSON()

Example #4

0

Show file

    def crawling_scan(self, url, api_calls=None, all_found_urls=None):
        if api_calls is None:
            api_calls = []
        if all_found_urls is None:
            all_found_urls = []
        self.count = self.count - 1
        if self.count < 0:
            return

        har_parser = HarParser(self.har_directory,
                               search_string=self.search_string,
                               remove_params=self.remove_params)

        # If uncommented, will return as soon as a matching call is found
        # if self.search_string is not None and len(apiCalls) > 0:
        # 	return apiCalls
        try:
            print("Scanning URL: " + url)
            html = self.open_url(url)
            if html is not None:
                soup = BeautifulSoup(html, "lxml")

                har_obj = har_parser.get_single_har_file()
                api_calls = har_parser.scan_har_file(har_obj,
                                                     api_calls=api_calls)

                all_found_urls, new_urls = self.find_internal_urls(
                    soup, url, all_found_urls)
                shuffle(new_urls)

                for newUrl in new_urls:
                    self.crawling_scan(newUrl, api_calls, all_found_urls)

        except (KeyboardInterrupt, SystemExit):
            print("Stopping crawl")
            self.browser.close()
            api_writer = APIWriter(api_calls)
            api_writer.output_apis()
            sys.exit(1)
        return api_calls

Example #5

0

Show file

    help=
    "File containing JSON formatted cookies to set in driver (with target URL only)",
    nargs='?')
parser.add_argument("-i",
                    help="Count of pages to crawl (with target URL only)",
                    nargs='?')
parser.add_argument(
    '--p',
    help=
    "Flag, remove unnecessary parameters (may dramatically increase run time)",
    action='store_true')
args = parser.parse_args()

if not (args.u or args.d):
    print("Need to provide either a URL or directory or both. Use -h for help")
    sys.exit(1)

#Default to directory name "hars" and count of 1
directory = "hars" if args.d is None else args.d
count = 1 if args.i is None else int(args.i)

finder = APIFinder(url=args.u,
                   harDirectory=directory,
                   searchString=args.s,
                   removeParams=args.p,
                   count=count,
                   cookies=args.c)

apiCalls = finder.start()
apiWriter = APIWriter(apiCalls)
apiWriter.outputAPIs()

Example #6

0

Show file

File: consoleservice.py Project: tfulmer1/apiscraper

    "File containing JSON formatted cookies to set in driver (with target URL only)",
    nargs='?')
parser.add_argument("-i",
                    help="Count of pages to crawl (with target URL only)",
                    nargs='?')
parser.add_argument(
    '--p',
    help=
    "Flag, remove unnecessary parameters (may dramatically increase run time)",
    action='store_true')
args = parser.parse_args()

if not (args.u or args.d):
    print("Need to provide either a URL or directory or both. Use -h for help")
    sys.exit(1)

# Default to directory name "hars" and count of 1
directory = "hars" if args.d is None else args.d
count = 1 if args.i is None else int(args.i)

finder = APIFinder(url=args.u,
                   har_directory=directory,
                   search_string=args.s,
                   remove_params=args.p,
                   count=count,
                   cookies=args.c)

apiCalls = finder.start()
apiWriter = APIWriter(apiCalls)
apiWriter.output_apis()

Example #7

0

Show file

File: consoleservice.py Project: REMitchell/apiscraper

from apicall import APIWriter
from apiFinder import APIFinder
import sys
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("-u", help="Target URL. If not provided, target directory will be scanned for har files.", nargs='?')
parser.add_argument("-d", help="Target directory (default is \"hars\"). If URL is provided, directory will store har files. If URL is not provided, directory will be scanned. ", nargs='?')
parser.add_argument("-s", help="Search term", nargs='?')
parser.add_argument("-c", help="File containing JSON formatted cookies to set in driver (with target URL only)", nargs='?')
parser.add_argument("-i", help="Count of pages to crawl (with target URL only)", nargs='?')
parser.add_argument('--p', help="Flag, remove unnecessary parameters (may dramatically increase run time)", action='store_true')
args = parser.parse_args()

if not (args.u or args.d):
	print("Need to provide either a URL or directory or both. Use -h for help")
	sys.exit(1)

#Default to directory name "hars" and count of 1
directory = "hars" if args.d is None else args.d
count = 1 if args.i is None else int(args.i)

finder = APIFinder(url=args.u, harDirectory=directory, searchString=args.s, removeParams=args.p, count=count, cookies=args.c)

apiCalls = finder.start()
apiWriter = APIWriter(apiCalls)
apiWriter.outputAPIs()