Python PhantomDriverの例、webxray.PhantomDriver.PhantomDriver Pythonの例

コード例 #1

0

ファイルを表示

ファイル: Collector.py プロジェクト: Ressmann/webXray

    def process_uri(self, uri):
        sql_driver = MySQLDriver(self.db_name)
        output_store = OutputStore(self.db_name)
        phantom_driver = PhantomDriver(
            '--ignore-ssl-errors=true --ssl-protocol=any', 'wbxr_logger.js')

        # this can be higher or lower depending on network load
        # generally, 90 seems to be fine, so keep with it
        try:
            phantom_output = phantom_driver.execute(uri, 90)
        except:
            print("\t\t%-50s Phantomjs Did Not Return." % uri[:50])
            sql_driver.log_error(uri, "FAIL: Phantomjs Did Not Return.")
            return

        if re.match('^FAIL.+', phantom_output):
            print("\t\t%-50s Phantom Error\n\t%s" % (uri[:50], phantom_output))
            sql_driver.log_error(uri, phantom_output)
        else:
            print("\t\t%-50s %s" %
                  (uri[:50], output_store.store(uri, phantom_output)))

        # closes our db connections
        sql_driver.close()
        output_store.close()
        return

コード例 #2

0

ファイルを表示

ファイル: api.py プロジェクト: ushmz/gayatri

def analyze_url(uri: str) -> List[str]:
    """
    Analyze given URI and get page information by using webXray.
    Arg:
        uri(str): Any URI that is not analyzed yet.
    Return:
        dict {"uri": list[str]}: (key: given uri, value: listed domain name of cookies)
    """
    parser = ParseURI()

    pd = PhantomDriver("--ignore-ssl-errors=true --ssl-protocol=any", "wbxr_logger.js")
    output = pd.execute(uri, 25)

    if re.match("^FAIL.+", output):
        # Probably this isn't needed
        return []
    else:
        try:
            parsed_data = json.loads(re.search("(\{.+\})", output).group(1))
        except Exception as e:
            print(e)
            return []

        orig_domain = parser.get_domain_pubsuffix_tld(uri)[0]
        cookie_domains = map(lambda x: x["domain"], parsed_data["cookies"])
        tpcookie_domains = filter(
            lambda x: parser.get_domain_pubsuffix_tld(f"http://{x[1:]}")[0]
            != orig_domain,
            cookie_domains,
        )
        tpcookie_domain_names = map(remove_dot, tpcookie_domains)
        return list(tpcookie_domain_names)

コード例 #3

0

ファイルを表示

ファイル: Collector.py プロジェクト: englehardt/webXray

	def process_uri(self, uri):
		sql_driver 		= MySQLDriver(self.db_name)
		output_store 	= OutputStore(self.db_name)
		phantom_driver 	= PhantomDriver('--ignore-ssl-errors=true --ssl-protocol=any', 'wbxr_logger.js')

		# this can be higher or lower depending on network load
		# generally, 90 seems to be fine, so keep with it
		try:
			phantom_output = phantom_driver.execute(uri, 90)
		except:
			print("\t\t%-50s Phantomjs Did Not Return." % uri[:50])
			sql_driver.log_error(uri, "FAIL: Phantomjs Did Not Return.")
			return	

		if re.match('^FAIL.+', phantom_output):
			print("\t\t%-50s Phantom Error\n\t%s" % (uri[:50], phantom_output))
			sql_driver.log_error(uri, phantom_output)
		else:
			print("\t\t%-50s %s" % (uri[:50], output_store.store(uri, phantom_output)))
	
		# closes our db connections
		sql_driver.close()
		output_store.close()
		return

コード例 #4

0

ファイルを表示

    def execute(self, url, browser_wait):
        """
		Main function, loads page and analyzes results.
		"""

        print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
        print('Single Site Test On: %s' % url)
        print('\tBrowser type is %s' % self.browser_type)
        print('\tBrowser wait time is %s seconds' % browser_wait)

        # make sure it is an http(s) address
        if not re.match('^https?://', url):
            print('\tNot a valid url, aborting')
            return None

        # import and set up specified browser driver
        if self.browser_type == 'phantomjs':
            browser_driver = PhantomDriver()
        elif self.browser_type == 'chrome':
            browser_driver = ChromeDriver()
            chrome_ua = browser_driver.get_ua_for_headless()
            browser_driver = ChromeDriver(ua=chrome_ua)

        # attempt to get the page
        browser_output = browser_driver.get_webxray_scan_data(
            url, browser_wait)

        # if there was a problem we print the error
        if browser_output['success'] == False:
            print('\t\t%-50s Browser Error: %s' %
                  (url[:50], browser_output['result']))
            return
        else:
            browser_output = browser_output['result']

        # get the ip, fqdn, domain, pubsuffix, and tld from the URL
        # we need the domain to figure out if cookies/elements are third-party
        origin_ip_fqdn_domain_pubsuffix_tld = self.url_parser.get_ip_fqdn_domain_pubsuffix_tld(
            url)

        # if we can't get page domain info we bail out
        if origin_ip_fqdn_domain_pubsuffix_tld is None:
            print('could not parse origin domain')
            return None

        origin_ip = origin_ip_fqdn_domain_pubsuffix_tld[0]
        origin_fqdn = origin_ip_fqdn_domain_pubsuffix_tld[1]
        origin_domain = origin_ip_fqdn_domain_pubsuffix_tld[2]
        origin_pubsuffix = origin_ip_fqdn_domain_pubsuffix_tld[3]
        origin_tld = origin_ip_fqdn_domain_pubsuffix_tld[4]

        print('\n\t------------------{ URL }------------------')
        print('\t' + url)
        print('\n\t------------------{ Final URL }------------------')
        print('\t' + browser_output['final_url'])
        print('\n\t------------------{ Domain }------------------')
        print('\t' + origin_domain)
        print(
            '\n\t------------------{ Seconds to Complete Download }------------------'
        )
        print('\t%s' % (browser_output['load_time'] / 1000))
        print('\n\t------------------{ 3rd Party Cookies }------------------')
        cookie_list = []
        for cookie in browser_output['cookies']:
            # get domain, pubsuffix, and tld from cookie
            # we have to append http b/c the parser will fail, this is a lame hack, should fix
            cookie_ip_fqdn_domain_pubsuffix_tld = self.url_parser.get_ip_fqdn_domain_pubsuffix_tld(
                'http://' + cookie['domain'])

            # something went wrong, but we continue to go process the elements
            if cookie_ip_fqdn_domain_pubsuffix_tld is None:
                print('could not parse cookie')
                continue

            # otherwise, everything went fine
            cookie_ip = cookie_ip_fqdn_domain_pubsuffix_tld[0]
            cookie_fqdn = cookie_ip_fqdn_domain_pubsuffix_tld[1]
            cookie_domain = cookie_ip_fqdn_domain_pubsuffix_tld[2]
            cookie_pubsuffix = cookie_ip_fqdn_domain_pubsuffix_tld[3]
            cookie_tld = cookie_ip_fqdn_domain_pubsuffix_tld[4]

            # print external cookies
            if origin_domain not in cookie_domain:
                cookie_list.append(
                    re.sub('^\.', '', cookie['domain']) + ' -> ' +
                    cookie['name'])

        cookie_list.sort()
        count = 0
        for cookie in cookie_list:
            count += 1
            print('\t%s) %s' % (count, cookie))

        print(
            '\n\t------------------{ 3p Domains Requested }------------------')
        element_domains = []

        for request in browser_output['processed_requests']:
            # if the request starts with 'data'/etc we can't parse tld anyway, so skip
            if re.match('^(data|about|chrome).+', request):
                continue

            element_ip_fqdn_domain_pubsuffix_tld = self.url_parser.get_ip_fqdn_domain_pubsuffix_tld(
                request)

            # problem with this request, bail on it and do the next
            if element_ip_fqdn_domain_pubsuffix_tld is None:
                continue

            element_ip = element_ip_fqdn_domain_pubsuffix_tld[0]
            element_fqdn = element_ip_fqdn_domain_pubsuffix_tld[1]
            element_domain = element_ip_fqdn_domain_pubsuffix_tld[2]
            element_pubsuffix = element_ip_fqdn_domain_pubsuffix_tld[3]
            element_tld = element_ip_fqdn_domain_pubsuffix_tld[4]

            if origin_domain not in element_domain:
                if element_domain not in element_domains:
                    element_domains.append(element_domain)

        element_domains.sort()

        count = 0
        for domain in element_domains:
            count += 1
            if domain in self.domain_owners:
                lineage = ''
                for item in self.get_lineage(self.domain_owners[domain]):
                    lineage += self.id_to_owner[item] + ' > '
                print('\t%s) %s [%s]' % (count, domain, lineage[:-3]))
            else:
                print('\t%s) %s [Unknown Owner]' % (count, domain))

コード例 #5

0

ファイルを表示

ファイル: Collector.py プロジェクト: timlib/webXray

	def process_url(self, url):
		"""
		this function takes a specified url, loads it in the browser (currently phantomjs)
			and returns json-formatted output with relevant request data, etc.

		the output_store class then puts this data in the db for later analysis
		"""

		# set up sql connection used to log errors and do timeseries checks
		if self.db_engine == 'mysql':		
			from webxray.MySQLDriver import MySQLDriver
			sql_driver = MySQLDriver(self.db_name)
		elif self.db_engine == 'postgres':	
			from webxray.PostgreSQLDriver import PostgreSQLDriver
			sql_driver = PostgreSQLDriver(self.db_name)
		elif self.db_engine == 'sqlite':	
			from webxray.SQLiteDriver import SQLiteDriver
			sql_driver = SQLiteDriver(self.db_name)

		# output store does the heavy lifting of analyzing browser output and storing to db
		output_store = OutputStore(self.db_engine, self.db_name)

		# support for loading same page with multiple browsers - purposefully undocumented 
		for browser_type in self.browser_types:

			# import and set up specified browser driver
			# 	note we need to set up a new browser each time to 
			#	get a fresh profile
			if browser_type == 'phantomjs':
				browser_driver 	= PhantomDriver()
			elif browser_type == 'chrome':
				browser_driver 	= ChromeDriver(ua=self.chrome_ua)

			# support for timeseries collections - purposefully undocumented 
			if self.allow_timeseries:
				page_last_accessed_browser_type = sql_driver.get_page_last_accessed_by_browser_type(url,browser_type)
				if page_last_accessed_browser_type:
					time_diff = datetime.now()-page_last_accessed_browser_type[0]
					if time_diff < timedelta(minutes=self.interval_minutes) and page_last_accessed_browser_type[1] == browser_type:
						print("\t\t%-50s Scanned too recently with %s" % (url[:50], browser_type))
						continue

			# attempt to load the page, fail gracefully
			try:
				browser_output = browser_driver.get_webxray_scan_data(url, self.browser_wait)
			except:
				print('\t\t%-50s Browser %s Did Not Return' % (url[:50], browser_type))
				sql_driver.log_error(url, 'Unable to load page')
				sql_driver.close()
				return
			
			# if there was a problem we log the error
			if browser_output['success'] == False:
				print('\t\t%-50s Browser %s Error: %s' % (url[:50], browser_type, browser_output['result']))
				sql_driver.log_error(url, 'Unable to load page')
				sql_driver.close()
				return
			else:
				# no error, treat result as browser output
				browser_output = browser_output['result']

			# attempt to store the output
			if output_store.store(url, browser_output):
				print('\t\t%-50s Success with %s' % (url[:50],browser_type))
			else:
				print('\t\t%-50s Fail with %s' % (url[:50],browser_type))
				sql_driver.log_error(url, 'Unable to load page')

		sql_driver.close()
		return

コード例 #6

0

ファイルを表示

	def process_url(self, url):
		"""
		this function takes a specified url, loads it in the browser (currently phantomjs)
			and returns json-formatted output with relevant request data, etc.

		the output_store class then puts this data in the db for later analysis
		"""

		# set up sql connection used to log errors and do timeseries checks
		if self.db_engine == 'mysql':		
			from webxray.MySQLDriver import MySQLDriver
			sql_driver = MySQLDriver(self.db_name)
		elif self.db_engine == 'postgres':	
			from webxray.PostgreSQLDriver import PostgreSQLDriver
			sql_driver = PostgreSQLDriver(self.db_name)
		elif self.db_engine == 'sqlite':	
			from webxray.SQLiteDriver import SQLiteDriver
			sql_driver = SQLiteDriver(self.db_name)

		# output store does the heavy lifting of analyzing browser output and storing to db
		output_store = OutputStore(self.db_engine, self.db_name)

		# support for loading same page with multiple browsers - purposefully undocumented 
		for browser_type in self.browser_types:

			# import and set up specified browser driver
			# 	note we need to set up a new browser each time to 
			#	get a fresh profile
			if browser_type == 'phantomjs':
				browser_driver 	= PhantomDriver()
			elif browser_type == 'chrome':
				browser_driver 	= ChromeDriver(ua=self.chrome_ua)

			# support for timeseries collections - purposefully undocumented 
			if self.allow_timeseries:
				page_last_accessed_browser_type = sql_driver.get_page_last_accessed_by_browser_type(url,browser_type)
				if page_last_accessed_browser_type:
					time_diff = datetime.now()-page_last_accessed_browser_type[0]
					if time_diff < timedelta(minutes=self.interval_minutes) and page_last_accessed_browser_type[1] == browser_type:
						print("\t\t%-50s Scanned too recently with %s" % (url[:50], browser_type))
						continue

			# attempt to load the page, fail gracefully
			try:
				browser_output = browser_driver.get_webxray_scan_data(url, self.browser_wait)
			except:
				print('\t\t%-50s Browser %s Did Not Return' % (url[:50], browser_type))
				sql_driver.log_error(url, 'Unable to load page')
				sql_driver.close()
				return		
			
			# if there was a problem browser_output will be None
			if browser_output == None:
				print('\t\t%-50s Browser %s Did Not Return' % (url[:50], browser_type))
				sql_driver.log_error(url, 'Unable to load page')
				sql_driver.close()
				return

			# attempt to store the output
			if output_store.store(url, browser_output):
				print('\t\t%-50s Success with %s' % (url[:50],browser_type))
			else:
				print('\t\t%-50s Fail with %s' % (url[:50],browser_type))
				sql_driver.log_error(url, 'Unable to load page')

		sql_driver.close()
		return

コード例 #7

0

ファイルを表示

ファイル: SingleScan.py プロジェクト: timlib/webXray

	def execute(self, url, browser_wait):
		"""
		Main function, loads page and analyzes results.
		"""

		print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
		print('Single Site Test On: %s' % url)
		print('\tBrowser type is %s' % self.browser_type)
		print('\tBrowser wait time is %s seconds' % browser_wait)

		# make sure it is an http(s) address
		if not re.match('^https?://', url): 
			print('\tNot a valid url, aborting')
			return None

		# import and set up specified browser driver
		if self.browser_type == 'phantomjs':
			browser_driver 	= PhantomDriver()
		elif self.browser_type == 'chrome':
			browser_driver 	= ChromeDriver()
			chrome_ua = browser_driver.get_ua_for_headless()
			browser_driver 	= ChromeDriver(ua=chrome_ua)

		# attempt to get the page
		browser_output = browser_driver.get_webxray_scan_data(url, browser_wait)

		# if there was a problem we print the error
		if browser_output['success'] == False:
			print('\t\t%-50s Browser Error: %s' % (url[:50], browser_output['result']))
			return
		else:
			browser_output = browser_output['result']

		# get the ip, fqdn, domain, pubsuffix, and tld from the URL
		# we need the domain to figure out if cookies/elements are third-party
		origin_ip_fqdn_domain_pubsuffix_tld	= self.url_parser.get_ip_fqdn_domain_pubsuffix_tld(url)

		# if we can't get page domain info we bail out
		if origin_ip_fqdn_domain_pubsuffix_tld is None:
			print('could not parse origin domain')
			return None

		origin_ip 			= origin_ip_fqdn_domain_pubsuffix_tld[0]
		origin_fqdn 		= origin_ip_fqdn_domain_pubsuffix_tld[1]
		origin_domain 		= origin_ip_fqdn_domain_pubsuffix_tld[2]
		origin_pubsuffix 	= origin_ip_fqdn_domain_pubsuffix_tld[3]
		origin_tld 			= origin_ip_fqdn_domain_pubsuffix_tld[4]

		print('\n\t------------------{ URL }------------------')
		print('\t'+url)
		print('\n\t------------------{ Final URL }------------------')
		print('\t'+browser_output['final_url'])
		print('\n\t------------------{ Domain }------------------')
		print('\t'+origin_domain)
		print('\n\t------------------{ Seconds to Complete Download }------------------')
		print('\t%s' % (browser_output['load_time']/1000))
		print('\n\t------------------{ 3rd Party Cookies }------------------')
		cookie_list = []
		for cookie in browser_output['cookies']:
			# get domain, pubsuffix, and tld from cookie
			# we have to append http b/c the parser will fail, this is a lame hack, should fix
			cookie_ip_fqdn_domain_pubsuffix_tld	= self.url_parser.get_ip_fqdn_domain_pubsuffix_tld('http://'+cookie['domain'])

			# something went wrong, but we continue to go process the elements
			if cookie_ip_fqdn_domain_pubsuffix_tld is None:
				print('could not parse cookie')
				continue

			# otherwise, everything went fine
			cookie_ip 			= cookie_ip_fqdn_domain_pubsuffix_tld[0]
			cookie_fqdn 		= cookie_ip_fqdn_domain_pubsuffix_tld[1]
			cookie_domain 		= cookie_ip_fqdn_domain_pubsuffix_tld[2]
			cookie_pubsuffix 	= cookie_ip_fqdn_domain_pubsuffix_tld[3]
			cookie_tld 			= cookie_ip_fqdn_domain_pubsuffix_tld[4]

			# print external cookies
			if origin_domain not in cookie_domain:
				cookie_list.append(re.sub('^\.', '', cookie['domain'])+' -> '+cookie['name'])

		cookie_list.sort()
		count = 0
		for cookie in cookie_list:
			count += 1
			print('\t%s) %s' % (count,cookie))

		print('\n\t------------------{ 3p Domains Requested }------------------')
		element_domains = []

		for request in browser_output['processed_requests']:
			# if the request starts with 'data'/etc we can't parse tld anyway, so skip
			if re.match('^(data|about|chrome).+', request):
				continue

			element_ip_fqdn_domain_pubsuffix_tld	= self.url_parser.get_ip_fqdn_domain_pubsuffix_tld(request)

			# problem with this request, bail on it and do the next
			if element_ip_fqdn_domain_pubsuffix_tld is None:
				continue

			element_ip 			= element_ip_fqdn_domain_pubsuffix_tld[0]
			element_fqdn 		= element_ip_fqdn_domain_pubsuffix_tld[1]
			element_domain 		= element_ip_fqdn_domain_pubsuffix_tld[2]
			element_pubsuffix 	= element_ip_fqdn_domain_pubsuffix_tld[3]
			element_tld 		= element_ip_fqdn_domain_pubsuffix_tld[4]
				
			if origin_domain not in element_domain:
				if element_domain not in element_domains:
					element_domains.append(element_domain)
		
		element_domains.sort()

		count = 0
		for domain in element_domains:
			count += 1
			if domain in self.domain_owners:
				lineage = ''
				for item in self.get_lineage(self.domain_owners[domain]):
					lineage += self.id_to_owner[item]+' > '
				print('\t%s) %s [%s]' % (count, domain, lineage[:-3]))
			else:
				print('\t%s) %s [Unknown Owner]' % (count, domain))

コード例 #8

0

ファイルを表示

ファイル: OutputPrinter.py プロジェクト: englehardt/webXray

	def report(self, uri):
		phantom_driver = PhantomDriver('--ignore-ssl-errors=true --ssl-protocol=any', 'wbxr_logger.js')
		phantom_output = phantom_driver.execute(uri, 90)
		
		if re.match('^FAIL.+', phantom_output):
			print("\tERROR URI: "+uri+"\n\t\tExiting on: "+phantom_output)
			exit()
	
		origin_domain_pubsuffix_tld = self.uri_parser.get_domain_pubsuffix_tld(uri)
		origin_domain = origin_domain_pubsuffix_tld[0]
		origin_pubsuffix = origin_domain_pubsuffix_tld[1]
		origin_tld = origin_domain_pubsuffix_tld[2]

		# parse out the json from our phantom_output
		try:
			data = json.loads(re.search('(\{.+\})', phantom_output).group(1))
		except Exception as e:
			print("\t\tException: %s" % e)
			print("\t\tphantom_output was unreadable")
			print(phantom_output[:100])
			return ''

		print("\n\t------------------{ URI }------------------")
		print("\t"+uri)
		print("\n\t------------------{ Final URI }------------------")
		print("\t"+data["final_uri"])
		print("\n\t------------------{ Domain }------------------")
		print("\t"+origin_domain)
		print("\n\t------------------{ Title }------------------")
		print("\t"+data["title"])
		print("\n\t------------------{ Description }------------------")
		print("\t"+data["meta_desc"])

		print("\n\t------------------{ 3rd Party Cookies }------------------")
		cookie_list = []
		for cookie in data["cookies"]:
			# get domain, pubsuffix, and tld from cookie
			# we have to append http b/c the parser will fail, this is a lame hack, should fix
			cookie_domain_pubsuffix_tld = self.uri_parser.get_domain_pubsuffix_tld("http://"+cookie["domain"])
			cookie_domain = cookie_domain_pubsuffix_tld[0]
			cookie_pubsuffix = cookie_domain_pubsuffix_tld[1]
			cookie_tld = cookie_domain_pubsuffix_tld[2]
			
			# print external cookies
			if origin_domain not in cookie_domain:
				cookie_list.append(re.sub("^\.", "", cookie["domain"])+" -> "+cookie["name"])#+" = "+cookie["value"])

		cookie_list.sort()
		for cookie in cookie_list:
			print("\t"+cookie)

		print("\n\t------------------{ External Requests }------------------")
		requested_domains = []
		for request in data["requested_uris"]:
			# if the request starts with "data" we can't parse tld anyway, so skip
			if re.match('^(data|about|chrome).+', request):
				continue

			# get domain, pubsuffix, and tld from request
			requested_domain_pubsuffix_tld = self.uri_parser.get_domain_pubsuffix_tld(request)
			requested_domain = requested_domain_pubsuffix_tld[0]
			requested_pubsuffix = requested_domain_pubsuffix_tld[1]
			requested_tld = requested_domain_pubsuffix_tld[2]
				
			if origin_domain not in requested_domain:
				if requested_domain not in requested_domains:
					requested_domains.append(requested_domain)
		
		requested_domains.sort()

		for domain in requested_domains:
			print("\t"+domain)

コード例 #9

0

ファイルを表示

ファイル: OutputPrinter.py プロジェクト: pombredanne/webXray-1

    def report(self, uri):
        phantom_driver = PhantomDriver(
            '--ignore-ssl-errors=true --ssl-protocol=any', 'wbxr_logger.js')
        phantom_output = phantom_driver.execute(uri, 90)

        if re.match('^FAIL.+', phantom_output):
            print("\tERROR URI: " + uri + "\n\t\tExiting on: " +
                  phantom_output)
            exit()

        origin_domain_pubsuffix_tld = self.uri_parser.get_domain_pubsuffix_tld(
            uri)
        origin_domain = origin_domain_pubsuffix_tld[0]
        origin_pubsuffix = origin_domain_pubsuffix_tld[1]
        origin_tld = origin_domain_pubsuffix_tld[2]

        # parse out the json from our phantom_output
        try:
            data = json.loads(re.search('(\{.+\})', phantom_output).group(1))
        except Exception as e:
            print("\t\tException: %s" % e)
            print("\t\tphantom_output was unreadable")
            print(phantom_output[:100])
            return ''

        print("\n\t------------------{ URI }------------------")
        print("\t" + uri)
        print("\n\t------------------{ Final URI }------------------")
        print("\t" + data["final_uri"])
        print("\n\t------------------{ Domain }------------------")
        print("\t" + origin_domain)
        print("\n\t------------------{ Title }------------------")
        print("\t" + data["title"])
        print("\n\t------------------{ Description }------------------")
        print("\t" + data["meta_desc"])

        print("\n\t------------------{ 3rd Party Cookies }------------------")
        cookie_list = []
        for cookie in data["cookies"]:
            # get domain, pubsuffix, and tld from cookie
            # we have to append http b/c the parser will fail, this is a lame hack, should fix
            cookie_domain_pubsuffix_tld = self.uri_parser.get_domain_pubsuffix_tld(
                "http://" + cookie["domain"])
            cookie_domain = cookie_domain_pubsuffix_tld[0]
            cookie_pubsuffix = cookie_domain_pubsuffix_tld[1]
            cookie_tld = cookie_domain_pubsuffix_tld[2]

            # print external cookies
            if origin_domain not in cookie_domain:
                cookie_list.append(
                    re.sub("^\.", "", cookie["domain"]) + " -> " +
                    cookie["name"])  #+" = "+cookie["value"])

        cookie_list.sort()
        for cookie in cookie_list:
            print("\t" + cookie)

        print("\n\t------------------{ External Requests }------------------")
        requested_domains = []
        for request in data["requested_uris"]:
            # if the request starts with "data" we can't parse tld anyway, so skip
            if re.match('^(data|about|chrome).+', request):
                continue

            # get domain, pubsuffix, and tld from request
            requested_domain_pubsuffix_tld = self.uri_parser.get_domain_pubsuffix_tld(
                request)
            requested_domain = requested_domain_pubsuffix_tld[0]
            requested_pubsuffix = requested_domain_pubsuffix_tld[1]
            requested_tld = requested_domain_pubsuffix_tld[2]

            if origin_domain not in requested_domain:
                if requested_domain not in requested_domains:
                    requested_domains.append(requested_domain)

        requested_domains.sort()

        for domain in requested_domains:
            print("\t" + domain)