Beispiel #1
0
    def process_uri(self, uri):
        sql_driver = MySQLDriver(self.db_name)
        output_store = OutputStore(self.db_name)
        phantom_driver = PhantomDriver(
            '--ignore-ssl-errors=true --ssl-protocol=any', 'wbxr_logger.js')

        # this can be higher or lower depending on network load
        # generally, 90 seems to be fine, so keep with it
        try:
            phantom_output = phantom_driver.execute(uri, 90)
        except:
            print("\t\t%-50s Phantomjs Did Not Return." % uri[:50])
            sql_driver.log_error(uri, "FAIL: Phantomjs Did Not Return.")
            return

        if re.match('^FAIL.+', phantom_output):
            print("\t\t%-50s Phantom Error\n\t%s" % (uri[:50], phantom_output))
            sql_driver.log_error(uri, phantom_output)
        else:
            print("\t\t%-50s %s" %
                  (uri[:50], output_store.store(uri, phantom_output)))

        # closes our db connections
        sql_driver.close()
        output_store.close()
        return
Beispiel #2
0
def analyze_url(uri: str) -> List[str]:
    """
    Analyze given URI and get page information by using webXray.
    Arg:
        uri(str): Any URI that is not analyzed yet.
    Return:
        dict {"uri": list[str]}: (key: given uri, value: listed domain name of cookies)
    """
    parser = ParseURI()

    pd = PhantomDriver("--ignore-ssl-errors=true --ssl-protocol=any", "wbxr_logger.js")
    output = pd.execute(uri, 25)

    if re.match("^FAIL.+", output):
        # Probably this isn't needed
        return []
    else:
        try:
            parsed_data = json.loads(re.search("(\{.+\})", output).group(1))
        except Exception as e:
            print(e)
            return []

        orig_domain = parser.get_domain_pubsuffix_tld(uri)[0]
        cookie_domains = map(lambda x: x["domain"], parsed_data["cookies"])
        tpcookie_domains = filter(
            lambda x: parser.get_domain_pubsuffix_tld(f"http://{x[1:]}")[0]
            != orig_domain,
            cookie_domains,
        )
        tpcookie_domain_names = map(remove_dot, tpcookie_domains)
        return list(tpcookie_domain_names)
Beispiel #3
0
	def process_uri(self, uri):
		sql_driver 		= MySQLDriver(self.db_name)
		output_store 	= OutputStore(self.db_name)
		phantom_driver 	= PhantomDriver('--ignore-ssl-errors=true --ssl-protocol=any', 'wbxr_logger.js')

		# this can be higher or lower depending on network load
		# generally, 90 seems to be fine, so keep with it
		try:
			phantom_output = phantom_driver.execute(uri, 90)
		except:
			print("\t\t%-50s Phantomjs Did Not Return." % uri[:50])
			sql_driver.log_error(uri, "FAIL: Phantomjs Did Not Return.")
			return	

		if re.match('^FAIL.+', phantom_output):
			print("\t\t%-50s Phantom Error\n\t%s" % (uri[:50], phantom_output))
			sql_driver.log_error(uri, phantom_output)
		else:
			print("\t\t%-50s %s" % (uri[:50], output_store.store(uri, phantom_output)))
	
		# closes our db connections
		sql_driver.close()
		output_store.close()
		return
Beispiel #4
0
	def report(self, uri):
		phantom_driver = PhantomDriver('--ignore-ssl-errors=true --ssl-protocol=any', 'wbxr_logger.js')
		phantom_output = phantom_driver.execute(uri, 90)
		
		if re.match('^FAIL.+', phantom_output):
			print("\tERROR URI: "+uri+"\n\t\tExiting on: "+phantom_output)
			exit()
	
		origin_domain_pubsuffix_tld = self.uri_parser.get_domain_pubsuffix_tld(uri)
		origin_domain = origin_domain_pubsuffix_tld[0]
		origin_pubsuffix = origin_domain_pubsuffix_tld[1]
		origin_tld = origin_domain_pubsuffix_tld[2]

		# parse out the json from our phantom_output
		try:
			data = json.loads(re.search('(\{.+\})', phantom_output).group(1))
		except Exception as e:
			print("\t\tException: %s" % e)
			print("\t\tphantom_output was unreadable")
			print(phantom_output[:100])
			return ''

		print("\n\t------------------{ URI }------------------")
		print("\t"+uri)
		print("\n\t------------------{ Final URI }------------------")
		print("\t"+data["final_uri"])
		print("\n\t------------------{ Domain }------------------")
		print("\t"+origin_domain)
		print("\n\t------------------{ Title }------------------")
		print("\t"+data["title"])
		print("\n\t------------------{ Description }------------------")
		print("\t"+data["meta_desc"])

		print("\n\t------------------{ 3rd Party Cookies }------------------")
		cookie_list = []
		for cookie in data["cookies"]:
			# get domain, pubsuffix, and tld from cookie
			# we have to append http b/c the parser will fail, this is a lame hack, should fix
			cookie_domain_pubsuffix_tld = self.uri_parser.get_domain_pubsuffix_tld("http://"+cookie["domain"])
			cookie_domain = cookie_domain_pubsuffix_tld[0]
			cookie_pubsuffix = cookie_domain_pubsuffix_tld[1]
			cookie_tld = cookie_domain_pubsuffix_tld[2]
			
			# print external cookies
			if origin_domain not in cookie_domain:
				cookie_list.append(re.sub("^\.", "", cookie["domain"])+" -> "+cookie["name"])#+" = "+cookie["value"])

		cookie_list.sort()
		for cookie in cookie_list:
			print("\t"+cookie)

		print("\n\t------------------{ External Requests }------------------")
		requested_domains = []
		for request in data["requested_uris"]:
			# if the request starts with "data" we can't parse tld anyway, so skip
			if re.match('^(data|about|chrome).+', request):
				continue

			# get domain, pubsuffix, and tld from request
			requested_domain_pubsuffix_tld = self.uri_parser.get_domain_pubsuffix_tld(request)
			requested_domain = requested_domain_pubsuffix_tld[0]
			requested_pubsuffix = requested_domain_pubsuffix_tld[1]
			requested_tld = requested_domain_pubsuffix_tld[2]
				
			if origin_domain not in requested_domain:
				if requested_domain not in requested_domains:
					requested_domains.append(requested_domain)
		
		requested_domains.sort()

		for domain in requested_domains:
			print("\t"+domain)
Beispiel #5
0
    def report(self, uri):
        phantom_driver = PhantomDriver(
            '--ignore-ssl-errors=true --ssl-protocol=any', 'wbxr_logger.js')
        phantom_output = phantom_driver.execute(uri, 90)

        if re.match('^FAIL.+', phantom_output):
            print("\tERROR URI: " + uri + "\n\t\tExiting on: " +
                  phantom_output)
            exit()

        origin_domain_pubsuffix_tld = self.uri_parser.get_domain_pubsuffix_tld(
            uri)
        origin_domain = origin_domain_pubsuffix_tld[0]
        origin_pubsuffix = origin_domain_pubsuffix_tld[1]
        origin_tld = origin_domain_pubsuffix_tld[2]

        # parse out the json from our phantom_output
        try:
            data = json.loads(re.search('(\{.+\})', phantom_output).group(1))
        except Exception as e:
            print("\t\tException: %s" % e)
            print("\t\tphantom_output was unreadable")
            print(phantom_output[:100])
            return ''

        print("\n\t------------------{ URI }------------------")
        print("\t" + uri)
        print("\n\t------------------{ Final URI }------------------")
        print("\t" + data["final_uri"])
        print("\n\t------------------{ Domain }------------------")
        print("\t" + origin_domain)
        print("\n\t------------------{ Title }------------------")
        print("\t" + data["title"])
        print("\n\t------------------{ Description }------------------")
        print("\t" + data["meta_desc"])

        print("\n\t------------------{ 3rd Party Cookies }------------------")
        cookie_list = []
        for cookie in data["cookies"]:
            # get domain, pubsuffix, and tld from cookie
            # we have to append http b/c the parser will fail, this is a lame hack, should fix
            cookie_domain_pubsuffix_tld = self.uri_parser.get_domain_pubsuffix_tld(
                "http://" + cookie["domain"])
            cookie_domain = cookie_domain_pubsuffix_tld[0]
            cookie_pubsuffix = cookie_domain_pubsuffix_tld[1]
            cookie_tld = cookie_domain_pubsuffix_tld[2]

            # print external cookies
            if origin_domain not in cookie_domain:
                cookie_list.append(
                    re.sub("^\.", "", cookie["domain"]) + " -> " +
                    cookie["name"])  #+" = "+cookie["value"])

        cookie_list.sort()
        for cookie in cookie_list:
            print("\t" + cookie)

        print("\n\t------------------{ External Requests }------------------")
        requested_domains = []
        for request in data["requested_uris"]:
            # if the request starts with "data" we can't parse tld anyway, so skip
            if re.match('^(data|about|chrome).+', request):
                continue

            # get domain, pubsuffix, and tld from request
            requested_domain_pubsuffix_tld = self.uri_parser.get_domain_pubsuffix_tld(
                request)
            requested_domain = requested_domain_pubsuffix_tld[0]
            requested_pubsuffix = requested_domain_pubsuffix_tld[1]
            requested_tld = requested_domain_pubsuffix_tld[2]

            if origin_domain not in requested_domain:
                if requested_domain not in requested_domains:
                    requested_domains.append(requested_domain)

        requested_domains.sort()

        for domain in requested_domains:
            print("\t" + domain)