def test_web_connectivity_client_is_reachable(self): wcc = WebConnectivityClient( 'https://web-connectivity.th.ooni.io') wcc.queryBackend = MagicMock() wcc.queryBackend.return_value = defer.succeed({"status": "ok"}) result = yield wcc.isReachable() self.assertEqual(result, True)
def test_web_connectivity_client_is_not_reachable(self): wcc = WebConnectivityClient( 'https://web-connectivity.th.ooni.io') wcc.queryBackend = MagicMock() wcc.queryBackend.return_value = defer.fail(Exception()) result = yield wcc.isReachable() self.assertEqual(result, False)
def getReachableTestHelper(self, test_helper_name, test_helper_address, test_helper_alternate): # For the moment we look for alternate addresses only of # web_connectivity test helpers. if test_helper_name == 'web-connectivity': for web_connectivity_settings in self.sortAddressesByPriority( test_helper_address, test_helper_alternate): web_connectivity_test_helper = WebConnectivityClient( settings=web_connectivity_settings) if not web_connectivity_test_helper.isSupported(): log.err("Unsupported %s web_connectivity test_helper " "%s" % ( web_connectivity_settings['type'], web_connectivity_settings['address'] )) continue reachable = yield web_connectivity_test_helper.isReachable() if not reachable: log.err("Unreachable %s web_connectivity test helper %s" % ( web_connectivity_settings['type'], web_connectivity_settings['address'] )) continue defer.returnValue(web_connectivity_settings) raise e.NoReachableTestHelpers else: defer.returnValue(test_helper_address.encode('ascii'))
def setUp(self): """ Check for inputs. """ if self.localOptions['url']: self.input = self.localOptions['url'] if not self.input: raise Exception("No input specified") try: self.localOptions['retries'] = int(self.localOptions['retries']) except ValueError: self.localOptions['retries'] = 2 self.timeout = int(self.localOptions['timeout']) self.report['retries'] = self.localOptions['retries'] self.report['client_resolver'] = self.resolverIp self.report['dns_consistency'] = None self.report['body_length_match'] = None self.report['headers_match'] = None self.report['status_code_match'] = None self.report['accessible'] = None self.report['blocking'] = None self.report['control_failure'] = None self.report['http_experiment_failure'] = None self.report['dns_experiment_failure'] = None self.report['tcp_connect'] = [] self.report['control'] = {} self.hostname = urlparse(self.input).netloc if not self.hostname: raise AbsentHostname('No hostname', self.input) self.control = { 'tcp_connect': {}, 'dns': { 'addrs': [], 'failure': None, }, 'http_request': { 'body_length': -1, 'failure': None, 'status_code': -1, 'headers': {}, 'title': '' } } if isinstance(self.localOptions['backend'], dict): self.web_connectivity_client = WebConnectivityClient( settings=self.localOptions['backend'] ) else: self.web_connectivity_client = WebConnectivityClient( self.localOptions['backend'] )
def test_web_connectivity_client_control(self): wcc = WebConnectivityClient( 'https://web-connectivity.th.ooni.io') wcc.queryBackend = MagicMock() wcc.queryBackend.return_value = defer.succeed({}) yield wcc.control("http://example.com/", ["127.0.0.1:8080", "127.0.0.1:8082"]) wcc.queryBackend.assert_called_with( 'POST', '/', query={ "http_request": "http://example.com/", "tcp_connect": ["127.0.0.1:8080", "127.0.0.1:8082"] })
def test_web_connectivity_client_control(self): wcc = WebConnectivityClient('https://web-connectivity.th.ooni.io') wcc.queryBackend = MagicMock() wcc.queryBackend.return_value = defer.succeed({}) yield wcc.control("http://example.com/", ["127.0.0.1:8080", "127.0.0.1:8082"]) wcc.queryBackend.assert_called_with( 'POST', '/', query={ "http_request": "http://example.com/", "tcp_connect": ["127.0.0.1:8080", "127.0.0.1:8082"] })
def setUp(self): """ Check for inputs. """ if self.localOptions["url"]: self.input = self.localOptions["url"] if not self.input: raise Exception("No input specified") try: self.localOptions["retries"] = int(self.localOptions["retries"]) except ValueError: self.localOptions["retries"] = 2 self.timeout = int(self.localOptions["timeout"]) self.report["retries"] = self.localOptions["retries"] self.report["client_resolver"] = self.resolverIp self.report["dns_consistency"] = None self.report["body_length_match"] = None self.report["headers_match"] = None self.report["status_code_match"] = None self.report["accessible"] = None self.report["blocking"] = None self.report["control_failure"] = None self.report["http_experiment_failure"] = None self.report["dns_experiment_failure"] = None self.report["tcp_connect"] = [] self.report["control"] = {} self.hostname = urlparse(self.input).netloc if not self.hostname: raise Exception("Invalid input") self.control = { "tcp_connect": {}, "dns": {"addrs": [], "failure": None}, "http_request": {"body_length": -1, "failure": None, "status_code": -1, "headers": {}, "title": ""}, } if isinstance(self.localOptions["backend"], dict): self.web_connectivity_client = WebConnectivityClient(settings=self.localOptions["backend"]) else: self.web_connectivity_client = WebConnectivityClient(self.localOptions["backend"])
def getReachableTestHelper(self, test_helper_name, test_helper_address, test_helper_alternate): # For the moment we look for alternate addresses only of # web_connectivity test helpers. if test_helper_name == 'web-connectivity': for web_connectivity_settings in self.sortAddressesByPriority( test_helper_address, test_helper_alternate): web_connectivity_test_helper = WebConnectivityClient( settings=web_connectivity_settings) if not web_connectivity_test_helper.isSupported(): log.err("Unsupported %s web_connectivity test_helper " "%s" % (web_connectivity_settings['type'], web_connectivity_settings['address'])) continue reachable = yield web_connectivity_test_helper.isReachable() if not reachable: log.err("Unreachable %s web_connectivity test helper %s" % (web_connectivity_settings['type'], web_connectivity_settings['address'])) continue defer.returnValue(web_connectivity_settings) raise e.NoReachableTestHelpers else: defer.returnValue(test_helper_address.encode('ascii'))
class WebConnectivityTest(httpt.HTTPTest, dnst.DNSTest): """ Web connectivity """ name = "Web connectivity" description = ( "Identifies the reason for blocking of a given URL by " "performing DNS resolution of the hostname, doing a TCP " "connect to the resolved IPs and then fetching the page " "and comparing all these results with those of a control." ) author = "Arturo Filastò" version = "0.1.0" contentDecoders = [("gzip", GzipDecoder)] usageOptions = UsageOptions inputFile = ["file", "f", None, "List of URLS to perform GET requests to"] requiredTestHelpers = {"backend": "web-connectivity", "dns-discovery": "dns-discovery"} requiredOptions = ["backend", "dns-discovery"] requiresRoot = False requiresTor = False followRedirects = True # Factor used to determine HTTP blockpage detection # the factor 0.7 comes from http://www3.cs.stonybrook.edu/~phillipa/papers/JLFG14.pdf factor = 0.7 resolverIp = None @classmethod @defer.inlineCallbacks def setUpClass(cls): try: answers = yield client.lookupAddress(cls.localOptions["dns-discovery"]) assert len(answers) > 0 assert len(answers[0]) > 0 cls.resolverIp = answers[0][0].payload.dottedQuad() except Exception as exc: log.exception(exc) log.err("Failed to lookup the resolver IP address") def inputProcessor(self, filename): """ This is a specialised inputProcessor that also supports taking as input a csv file. """ def csv_generator(fh): for row in csv.reader(fh): yield row[0] def simple_file_generator(fh): for line in fh: l = line.strip() # Skip empty lines if not l: continue # Skip comment lines if l.startswith("#"): continue yield l fh = open(filename) try: line = fh.readline() # Detect the line of the citizenlab input file if line.startswith("url,"): generator = csv_generator(fh) else: fh.seek(0) generator = simple_file_generator(fh) for i in generator: if not i.startswith("http://") and not i.startswith("https://"): i = "http://{}/".format(i) yield i finally: fh.close() def setUp(self): """ Check for inputs. """ if self.localOptions["url"]: self.input = self.localOptions["url"] if not self.input: raise Exception("No input specified") try: self.localOptions["retries"] = int(self.localOptions["retries"]) except ValueError: self.localOptions["retries"] = 2 self.timeout = int(self.localOptions["timeout"]) self.report["retries"] = self.localOptions["retries"] self.report["client_resolver"] = self.resolverIp self.report["dns_consistency"] = None self.report["body_length_match"] = None self.report["headers_match"] = None self.report["status_code_match"] = None self.report["accessible"] = None self.report["blocking"] = None self.report["control_failure"] = None self.report["http_experiment_failure"] = None self.report["dns_experiment_failure"] = None self.report["tcp_connect"] = [] self.report["control"] = {} self.hostname = urlparse(self.input).netloc if not self.hostname: raise Exception("Invalid input") self.control = { "tcp_connect": {}, "dns": {"addrs": [], "failure": None}, "http_request": {"body_length": -1, "failure": None, "status_code": -1, "headers": {}, "title": ""}, } if isinstance(self.localOptions["backend"], dict): self.web_connectivity_client = WebConnectivityClient(settings=self.localOptions["backend"]) else: self.web_connectivity_client = WebConnectivityClient(self.localOptions["backend"]) def experiment_dns_query(self): log.msg("* doing DNS query for {}".format(self.hostname)) return self.performALookup(self.hostname) def experiment_tcp_connect(self, socket): log.msg("* connecting to {}".format(socket)) ip_address, port = socket.split(":") port = int(port) result = {"ip": ip_address, "port": port, "status": {"success": None, "failure": None, "blocked": None}} point = TCP4ClientEndpoint(reactor, ip_address, port) d = point.connect(TCPConnectFactory()) @d.addCallback def cb(p): result["status"]["success"] = True result["status"]["blocked"] = False self.report["tcp_connect"].append(result) @d.addErrback def eb(failure): result["status"]["success"] = False result["status"]["failure"] = failureToString(failure) self.report["tcp_connect"].append(result) return d @defer.inlineCallbacks def control_request(self, sockets): log.msg("* performing control request with backend") self.control = yield self.web_connectivity_client.control(http_request=self.input, tcp_connect=sockets) self.report["control"] = self.control @defer.inlineCallbacks def experiment_http_get_request(self): log.msg("* doing HTTP(s) request {}".format(self.input)) retries = 0 while True: try: result = yield self.doRequest(self.input, headers=REQUEST_HEADERS) break except: if retries > self.localOptions["retries"]: log.debug("Finished all the allowed retries") raise log.debug("Re-running HTTP request") retries += 1 defer.returnValue(result) def compare_headers(self, experiment_http_response): control_headers_lower = {k.lower(): v for k, v in self.report["control"]["http_request"]["headers"].items()} experiment_headers_lower = {k.lower(): v for k, v in experiment_http_response.headers.getAllRawHeaders()} if set(control_headers_lower.keys()) == set(experiment_headers_lower.keys()): return True uncommon_ctrl_headers = set(control_headers_lower.keys()) - set(COMMON_SERVER_HEADERS) uncommon_exp_headers = set(experiment_headers_lower.keys()) - set(COMMON_SERVER_HEADERS) return len(uncommon_ctrl_headers.intersection(uncommon_exp_headers)) > 0 def compare_body_lengths(self, experiment_http_response): control_body_length = self.control["http_request"]["body_length"] experiment_body_length = len(experiment_http_response.body) if control_body_length == experiment_body_length: rel = float(1) elif control_body_length == 0 or experiment_body_length == 0: rel = float(0) else: rel = float(control_body_length) / float(experiment_body_length) if rel > 1: rel = 1 / rel self.report["body_proportion"] = rel if rel > float(self.factor): return True else: return False def compare_titles(self, experiment_http_response): experiment_title = extractTitle(experiment_http_response.body).strip() control_title = self.control["http_request"]["title"].strip() control_words = control_title.split(" ") for idx, exp_word in enumerate(experiment_title.split(" ")): # We don't consider to match words that are shorter than 5 # characters (5 is the average word length for english) if len(exp_word) < 5: continue try: return control_words[idx].lower() == exp_word.lower() except IndexError: return False def compare_http_experiments(self, experiment_http_response): self.report["body_length_match"] = self.compare_body_lengths(experiment_http_response) self.report["headers_match"] = self.compare_headers(experiment_http_response) if str(self.control["http_request"]["status_code"])[0] != "5": self.report["status_code_match"] = ( self.control["http_request"]["status_code"] == experiment_http_response.code ) self.report["title_match"] = self.compare_titles(experiment_http_response) def compare_dns_experiments(self, experiment_dns_answers): if ( self.control["dns"]["failure"] is not None and self.control["dns"]["failure"] == self.report["dns_experiment_failure"] ): self.report["dns_consistency"] = "consistent" return True control_addrs = set(self.control["dns"]["addrs"]) experiment_addrs = set(experiment_dns_answers) if control_addrs == experiment_addrs: return True for experiment_addr in experiment_addrs: if is_public_ipv4_address(experiment_addr) is False: return False if len(control_addrs.intersection(experiment_addrs)) > 0: return True experiment_asns = set(map(lambda x: geoip.IPToLocation(x)["asn"], experiment_addrs)) control_asns = set(map(lambda x: geoip.IPToLocation(x)["asn"], control_addrs)) # Remove the instance of AS0 when we fail to find the ASN control_asns.discard("AS0") experiment_asns.discard("AS0") if len(control_asns.intersection(experiment_asns)) > 0: return True return False def compare_tcp_experiments(self): success = True for idx, result in enumerate(self.report["tcp_connect"]): socket = "%s:%s" % (result["ip"], result["port"]) control_status = self.control["tcp_connect"][socket] if result["status"]["success"] == False and control_status["status"] == True: self.report["tcp_connect"][idx]["status"]["blocked"] = True success = False else: self.report["tcp_connect"][idx]["status"]["blocked"] = False return success def determine_blocking(self, experiment_http_response, experiment_dns_answers): blocking = False control_http_failure = self.control["http_request"]["failure"] if control_http_failure is not None: control_http_failure = control_http_failure.split(" ")[0] experiment_http_failure = self.report["http_experiment_failure"] if experiment_http_failure is not None: experiment_http_failure = experiment_http_failure.split(" ")[0] if experiment_http_failure is None and control_http_failure is None: self.compare_http_experiments(experiment_http_response) dns_consistent = self.compare_dns_experiments(experiment_dns_answers) if dns_consistent is True: self.report["dns_consistency"] = "consistent" else: self.report["dns_consistency"] = "inconsistent" tcp_connect = self.compare_tcp_experiments() got_expected_web_page = None if experiment_http_failure is None and control_http_failure is None: got_expected_web_page = ( self.report["body_length_match"] is True or self.report["headers_match"] is True or self.report["title_match"] is True ) and self.report["status_code_match"] is not False if dns_consistent == True and tcp_connect == False and experiment_http_failure is not None: blocking = "tcp_ip" elif dns_consistent == True and tcp_connect == True and got_expected_web_page == False: blocking = "http-diff" elif ( dns_consistent == True and tcp_connect == True and experiment_http_failure is not None and control_http_failure is None ): if experiment_http_failure == "dns_lookup_error": blocking = "dns" else: blocking = "http-failure" elif dns_consistent == False and (got_expected_web_page == False or experiment_http_failure is not None): blocking = "dns" # This happens when the DNS resolution is injected, but the domain # doesn't have a valid record anymore or it resolves to an address # that is only accessible from within the country/network of the probe. elif ( dns_consistent == False and got_expected_web_page == False and (self.control["dns"]["failure"] is not None or control_http_failure != experiment_http_failure) ): blocking = "dns" return blocking @defer.inlineCallbacks def test_web_connectivity(self): log.msg("") log.msg("Starting test for {}".format(self.input)) experiment_dns = self.experiment_dns_query() @experiment_dns.addErrback def dns_experiment_err(failure): self.report["dns_experiment_failure"] = failureToString(failure) return [] experiment_dns_answers = yield experiment_dns port = 80 parsed_url = urlparse(self.input) if parsed_url.port: port = parsed_url.port elif parsed_url.scheme == "https": port = 443 sockets = [] for ip_address in experiment_dns_answers: if is_public_ipv4_address(ip_address) is True: sockets.append("{}:{}".format(ip_address, port)) # STEALTH in here we should make changes to make the test more stealth dl = [] for socket in sockets: dl.append(self.experiment_tcp_connect(socket)) results = yield defer.DeferredList(dl) experiment_http = self.experiment_http_get_request() @experiment_http.addErrback def http_experiment_err(failure): failure_string = failureToString(failure) log.err("Failed to perform HTTP request %s" % failure_string) self.report["http_experiment_failure"] = failure_string experiment_http_response = yield experiment_http control_request = self.control_request(sockets) @control_request.addErrback def control_err(failure): failure_string = failureToString(failure) log.err("Failed to perform control lookup: %s" % failure_string) self.report["control_failure"] = failure_string yield control_request if self.report["control_failure"] is None: self.report["blocking"] = self.determine_blocking(experiment_http_response, experiment_dns_answers) log.msg("") log.msg("Result for %s" % self.input) log.msg("-----------" + "-" * len(self.input)) if self.report["blocking"] is None: log.msg("* Could not determine status of blocking due to " "failing control request") elif self.report["blocking"] is False: log.msg("* No blocking detected") else: log.msg("* BLOCKING DETECTED due to %s" % (self.report["blocking"])) if ( self.report["http_experiment_failure"] == None and self.report["dns_experiment_failure"] == None and self.report["blocking"] in (False, None) ): self.report["accessible"] = True log.msg("* Is accessible") else: log.msg("* Is NOT accessible") self.report["accessible"] = False log.msg("") def postProcessor(self, measurements): self.summary["accessible"] = self.summary.get("accessible", []) self.summary["not-accessible"] = self.summary.get("not-accessible", []) self.summary["blocked"] = self.summary.get("blocked", {}) if self.report["blocking"] not in (False, None): self.summary["blocked"][self.report["blocking"]] = self.summary["blocked"].get(self.report["blocking"], []) self.summary["blocked"][self.report["blocking"]].append(self.input) if self.report["accessible"] is True: self.summary["accessible"].append(self.input) else: self.summary["not-accessible"].append(self.input) return self.report def displaySummary(self, summary): if len(summary["accessible"]) > 0: log.msg("") log.msg("Accessible URLS") log.msg("---------------") for url in summary["accessible"]: log.msg("* {}".format(url)) if len(summary["not-accessible"]) > 0: log.msg("") log.msg("Not accessible URLS") log.msg("-------------------") for url in summary["not-accessible"]: log.msg("* {}".format(url)) if len(summary["blocked"]) > 0: for reason, urls in summary["blocked"].items(): log.msg("") log.msg("URLS possibly blocked due to {}".format(reason)) log.msg("-----------------------------" + "-" * len(reason)) for url in urls: log.msg("* {}".format(url))
def test_web_connectivity_client_is_not_reachable(self): wcc = WebConnectivityClient('https://web-connectivity.th.ooni.io') wcc.queryBackend = MagicMock() wcc.queryBackend.return_value = defer.fail(Exception()) result = yield wcc.isReachable() self.assertEqual(result, False)
def test_web_connectivity_client_is_reachable(self): wcc = WebConnectivityClient('https://web-connectivity.th.ooni.io') wcc.queryBackend = MagicMock() wcc.queryBackend.return_value = defer.succeed({"status": "ok"}) result = yield wcc.isReachable() self.assertEqual(result, True)
class WebConnectivityTest(httpt.HTTPTest, dnst.DNSTest): """ Web connectivity """ name = "Web connectivity" description = ("Identifies the reason for blocking of a given URL by " "performing DNS resolution of the hostname, doing a TCP " "connect to the resolved IPs and then fetching the page " "and comparing all these results with those of a control.") author = "Arturo Filastò" version = "0.3.2" contentDecoders = [('gzip', GzipDecoder)] usageOptions = UsageOptions inputFile = [ 'file', 'f', None, 'List of URLS to perform GET requests to' ] requiredTestHelpers = { 'backend': 'web-connectivity', 'dns-discovery': 'dns-discovery' } requiredOptions = ['backend', 'dns-discovery'] requiresRoot = False requiresTor = False followRedirects = True ignorePrivateRedirects = True # These are the options to be shown on the GUI simpleOptions = [ {"name": "url", "type": "text"}, {"name": "file", "type": "file/url"} ] # Factor used to determine HTTP blockpage detection # the factor 0.7 comes from http://www3.cs.stonybrook.edu/~phillipa/papers/JLFG14.pdf factor = 0.7 resolverIp = None @classmethod @defer.inlineCallbacks def setUpClass(cls): try: answers = yield client.lookupAddress( cls.localOptions['dns-discovery'] ) assert len(answers) > 0 assert len(answers[0]) > 0 cls.resolverIp = answers[0][0].payload.dottedQuad() except Exception as exc: log.exception(exc) log.err("Failed to lookup the resolver IP address") def inputProcessor(self, filename): """ This is a specialised inputProcessor that also supports taking as input a csv file. """ def csv_generator(fh): for row in csv.reader(fh): yield row[0] def simple_file_generator(fh): for line in fh: l = line.strip() # Skip empty lines if not l: continue # Skip comment lines if l.startswith('#'): continue yield l fh = open(filename) try: line = fh.readline() # Detect the line of the citizenlab input file if line.startswith("url,"): generator = csv_generator(fh) else: fh.seek(0) generator = simple_file_generator(fh) if self.localOptions['no-shuffle'] != True: input_list = list(generator) random.shuffle(input_list) generator = input_list for i in generator: if (not i.startswith("http://") and not i.startswith("https://")): i = "http://{}/".format(i) if i.startswith('https://') and self.localOptions['no-http'] != True: yield 'http'+i[5:] yield i finally: fh.close() def setUp(self): """ Check for inputs. """ if self.localOptions['url']: self.input = self.localOptions['url'] if not self.input: raise Exception("No input specified") try: self.localOptions['retries'] = int(self.localOptions['retries']) except ValueError: self.localOptions['retries'] = 2 self.timeout = int(self.localOptions['timeout']) self.report['retries'] = self.localOptions['retries'] self.report['client_resolver'] = self.resolverIp self.report['dns_consistency'] = None self.report['body_length_match'] = None self.report['headers_match'] = None self.report['status_code_match'] = None self.report['accessible'] = None self.report['blocking'] = None self.report['control_failure'] = None self.report['http_experiment_failure'] = None self.report['dns_experiment_failure'] = None self.report['tcp_connect'] = [] self.report['control'] = {} self.hostname = urlparse(self.input).netloc if not self.hostname: raise AbsentHostname('No hostname', self.input) self.control = { 'tcp_connect': {}, 'dns': { 'addrs': [], 'failure': None, }, 'http_request': { 'body_length': -1, 'failure': None, 'status_code': -1, 'headers': {}, 'title': '' } } if isinstance(self.localOptions['backend'], dict): self.web_connectivity_client = WebConnectivityClient( settings=self.localOptions['backend'] ) else: self.web_connectivity_client = WebConnectivityClient( self.localOptions['backend'] ) def experiment_dns_query(self): log.msg("* doing DNS query for {}".format(self.hostname)) return self.performALookup(self.hostname) def experiment_tcp_connect(self, socket): log.msg("* connecting to {}".format(socket)) ip_address, port = socket.split(":") port = int(port) result = { 'ip': ip_address, 'port': port, 'status': { 'success': None, 'failure': None, 'blocked': None } } point = TCP4ClientEndpoint(reactor, ip_address, port) d = point.connect(TCPConnectFactory()) @d.addCallback def cb(p): result['status']['success'] = True result['status']['blocked'] = False self.report['tcp_connect'].append(result) @d.addErrback def eb(failure): result['status']['success'] = False result['status']['failure'] = failureToString(failure) self.report['tcp_connect'].append(result) return d @defer.inlineCallbacks def control_request(self, sockets): log.msg("* performing control request with backend") self.control = yield self.web_connectivity_client.control( http_request=self.input, tcp_connect=sockets, http_request_headers=REQUEST_HEADERS ) self.report['control'] = self.control @defer.inlineCallbacks def experiment_http_get_request(self): log.msg("* doing HTTP(s) request {}".format(self.input)) retries = 0 while True: try: result = yield self.doRequest(self.input, headers=REQUEST_HEADERS) break except: if retries > self.localOptions['retries']: log.debug("Finished all the allowed retries") raise log.debug("Re-running HTTP request") retries += 1 defer.returnValue(result) def compare_headers(self, experiment_http_response): control_headers_lower = {k.lower(): v for k, v in self.report['control']['http_request']['headers'].items() } experiment_headers_lower = {k.lower(): v for k, v in experiment_http_response.headers.getAllRawHeaders() } if (set(control_headers_lower.keys()) == set(experiment_headers_lower.keys())): return True uncommon_ctrl_headers = (set(control_headers_lower.keys()) - set(COMMON_SERVER_HEADERS)) uncommon_exp_headers = (set(experiment_headers_lower.keys()) - set(COMMON_SERVER_HEADERS)) return len(uncommon_ctrl_headers.intersection( uncommon_exp_headers)) > 0 def compare_body_lengths(self, experiment_http_response): control_body_length = self.control['http_request']['body_length'] experiment_body_length = len(experiment_http_response.body) if control_body_length == experiment_body_length: rel = float(1) elif control_body_length == 0 or experiment_body_length == 0: rel = float(0) else: rel = float(control_body_length) / float(experiment_body_length) if rel > 1: rel = 1/rel self.report['body_proportion'] = rel if rel > float(self.factor): return True else: return False def compare_titles(self, experiment_http_response): experiment_title = extractTitle(experiment_http_response.body).strip() control_title = self.control['http_request']['title'].strip() control_words = control_title.split(' ') for idx, exp_word in enumerate(experiment_title.split(' ')): # We don't consider to match words that are shorter than 5 # characters (5 is the average word length for english) if len(exp_word) < 5: continue try: return control_words[idx].lower() == exp_word.lower() except IndexError: return False def compare_http_experiments(self, experiment_http_response): self.report['body_length_match'] = \ self.compare_body_lengths(experiment_http_response) self.report['headers_match'] = \ self.compare_headers(experiment_http_response) if str(self.control['http_request']['status_code'])[0] != '5': self.report['status_code_match'] = ( self.control['http_request']['status_code'] == experiment_http_response.code ) self.report['title_match'] = self.compare_titles(experiment_http_response) def compare_dns_experiments(self, experiment_dns_answers): if self.control['dns']['failure'] is not None and \ self.control['dns']['failure'] == self.report['dns_experiment_failure']: self.report['dns_consistency'] = 'consistent' return True control_addrs = set(self.control['dns']['addrs']) experiment_addrs = set(experiment_dns_answers) if control_addrs == experiment_addrs: return True for experiment_addr in experiment_addrs: if is_public_ipv4_address(experiment_addr) is False: return False if len(control_addrs.intersection(experiment_addrs)) > 0: return True experiment_asns = set(map(lambda x: geoip.ip_to_location(x)['asn'], experiment_addrs)) control_asns = set(map(lambda x: geoip.ip_to_location(x)['asn'], control_addrs)) # Remove the instance of AS0 when we fail to find the ASN control_asns.discard('AS0') experiment_asns.discard('AS0') if len(control_asns.intersection(experiment_asns)) > 0: return True return False def compare_tcp_experiments(self): success = True for idx, result in enumerate(self.report['tcp_connect']): socket = "%s:%s" % (result['ip'], result['port']) control_status = self.control['tcp_connect'][socket] if result['status']['success'] == False and \ control_status['status'] == True: self.report['tcp_connect'][idx]['status']['blocked'] = True success = False else: self.report['tcp_connect'][idx]['status']['blocked'] = False return success def determine_blocking(self, experiment_http_response, experiment_dns_answers): blocking = False control_http_failure = self.control['http_request']['failure'] if control_http_failure is not None: control_http_failure = control_http_failure.split(" ")[0] experiment_http_failure = self.report['http_experiment_failure'] if experiment_http_failure is not None: experiment_http_failure = experiment_http_failure.split(" ")[0] if (experiment_http_failure is None and control_http_failure is None): self.compare_http_experiments(experiment_http_response) dns_consistent = self.compare_dns_experiments(experiment_dns_answers) if dns_consistent is True: self.report['dns_consistency'] = 'consistent' else: self.report['dns_consistency'] = 'inconsistent' tcp_connect = self.compare_tcp_experiments() got_expected_web_page = None if (experiment_http_failure is None and control_http_failure is None): got_expected_web_page = ( (self.report['body_length_match'] is True or self.report['headers_match'] is True or self.report['title_match'] is True) and self.report['status_code_match'] is not False ) if (dns_consistent == True and tcp_connect == False and experiment_http_failure is not None): blocking = 'tcp_ip' elif (dns_consistent == True and tcp_connect == True and got_expected_web_page == False): blocking = 'http-diff' elif (dns_consistent == True and tcp_connect == True and experiment_http_failure is not None and control_http_failure is None): if experiment_http_failure == 'dns_lookup_error': blocking = 'dns' else: blocking = 'http-failure' elif (dns_consistent == False and (got_expected_web_page == False or experiment_http_failure is not None)): blocking = 'dns' # This happens when the DNS resolution is injected, but the domain # doesn't have a valid record anymore or it resolves to an address # that is only accessible from within the country/network of the probe. elif (dns_consistent == False and got_expected_web_page == False and (self.control['dns']['failure'] is not None or control_http_failure != experiment_http_failure)): blocking = 'dns' return blocking @defer.inlineCallbacks def test_web_connectivity(self): log.msg("") log.msg("Starting test for {}".format(self.input)) experiment_dns = self.experiment_dns_query() @experiment_dns.addErrback def dns_experiment_err(failure): self.report['dns_experiment_failure'] = failureToString(failure) return [] experiment_dns_answers = yield experiment_dns port = 80 parsed_url = urlparse(self.input) if parsed_url.port: port = parsed_url.port elif parsed_url.scheme == 'https': port = 443 sockets = [] for ip_address in experiment_dns_answers: if is_public_ipv4_address(ip_address) is True: sockets.append("{}:{}".format(ip_address, port)) # STEALTH in here we should make changes to make the test more stealth dl = [] for socket in sockets: dl.append(self.experiment_tcp_connect(socket)) results = yield defer.DeferredList(dl) experiment_http = self.experiment_http_get_request() @experiment_http.addErrback def http_experiment_err(failure): failure_string = failureToString(failure) log.msg("Failed to perform HTTP request %s" % failure_string) self.report['http_experiment_failure'] = failure_string experiment_http_response = yield experiment_http control_request = self.control_request(sockets) @control_request.addErrback def control_err(failure): failure_string = failureToString(failure) log.err("Failed to perform control lookup: %s" % failure_string) self.report['control_failure'] = failure_string yield control_request if self.report['control_failure'] is None: self.report['blocking'] = self.determine_blocking(experiment_http_response, experiment_dns_answers) log.msg("") log.msg("Result for %s" % self.input) log.msg("-----------" + "-"*len(self.input)) if self.report['blocking'] is None: log.msg("* Could not determine status of blocking due to " "failing control request") elif self.report['blocking'] is False: log.msg("* No blocking detected") else: log.msg("* BLOCKING DETECTED due to %s" % (self.report['blocking'])) if (self.report['http_experiment_failure'] == None and self.report['dns_experiment_failure'] == None and self.report['blocking'] in (False, None)): self.report['accessible'] = True log.msg("* Is accessible") else: log.msg("* Is NOT accessible") self.report['accessible'] = False log.msg("") def postProcessor(self, measurements): self.summary['accessible'] = self.summary.get('accessible', []) self.summary['not-accessible'] = self.summary.get('not-accessible', []) self.summary['blocked'] = self.summary.get('blocked', {}) if self.report['blocking'] not in (False, None): self.summary['blocked'][self.report['blocking']] = \ self.summary['blocked'].get(self.report['blocking'], []) self.summary['blocked'][self.report['blocking']].append( self.input) if self.report['accessible'] is True: self.summary['accessible'].append(self.input) else: self.summary['not-accessible'].append(self.input) return self.report def displaySummary(self, summary): if len(summary['accessible']) > 0: log.msg("") log.msg("Accessible URLS") log.msg("---------------") for url in summary['accessible']: log.msg("* {}".format(url)) if len(summary['not-accessible']) > 0: log.msg("") log.msg("Not accessible URLS") log.msg("-------------------") for url in summary['not-accessible']: log.msg("* {}".format(url)) if len(summary['blocked']) > 0: for reason, urls in summary['blocked'].items(): log.msg("") log.msg("URLS possibly blocked due to {}".format(reason)) log.msg("-----------------------------"+'-'*len(reason)) for url in urls: log.msg("* {}".format(url))