def test_score_signal(): for can_fn, msm in minicans("signal", date(2021, 4, 27), date(2021, 4, 27), 100): assert msm["test_name"] == "signal" scores = fp.score_measurement(msm) assert scores rid = msm["report_id"] if rid == "20210427T023145Z_signal_CN_24400_n1_ynto2TVYXtqxhtOo": assert scores == { "analysis": { "signal_backend_failure": "generic_timeout_error" }, "blocking_general": 1.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, } elif rid == "20210427T000430Z_signal_AU_45671_n1_Zq1z77FuiG2IkqqC": assert scores == { "blocking_general": 0.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }
def test_score_psiphon(cans): for can_fn, msm in s3msmts("psiphon", start_date=date(2019, 9, 12)): # The earliest can is canned/2019-09-12/psiphon.0.tar.lz4 rid = msm["report_id"] mkeys = set(msm.keys()) mkeys.discard("resolver_ip") # Some msmts are missing this assert sorted(mkeys) == [ "data_format_version", "measurement_start_time", "probe_asn", "probe_cc", "probe_ip", "report_id", "software_name", "software_version", "test_keys", "test_name", "test_runtime", "test_start_time", "test_version", ], "https://explorer.ooni.org/measurement/{}".format(rid) assert sorted(msm["test_keys"]) == ["bootstrap_time", "failure"] # TODO: all msmts have empty test_keys->failure. No scoring is done. assert msm["test_keys"]["failure"] == "" assert 0 < msm["test_keys"]["bootstrap_time"] < 100 scores = fp.score_measurement(msm, [])
def test_score_psiphon(cans): for can_fn, msm in s3msmts("psiphon", date(2020, 1, 9), date(2020, 1, 10)): assert msm["test_name"] == "psiphon" rid = msm["report_id"] # test version 0.3.1 has different mkeys than before mkeys = set(msm.keys()) mkeys.discard("resolver_ip") # Some msmts are missing this assert len(mkeys) in (13, 15) assert len(msm["test_keys"]) in (3, 6, 7) assert 1 < msm["test_keys"]["bootstrap_time"] < 500 assert msm["test_keys"]["failure"] is None, msm scores = fp.score_measurement(msm) if rid == "20200109T111813Z_AS30722_RZeO9Ix6ET2LJzqGcinrDp1iqrhaGGDCHSwlOoybq2N9kZITQt": assert scores == { "accuracy": 1.0, "blocking_general": 0.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, "extra": { "test_runtime": 15.25602748, "bootstrap_time": 5.532639553 }, } break
def test_score_dash(cans): # rid -> blocking_general, accuracy expected = { "20191026T015105Z_AS4837_7vwBtbVmZZqwZhdTHnqHan0Nwa7bi7TeJ789htG3RB91C3eyU1": (0.1, 0.0, "blocking_general"), "20191026T022317Z_AS17380_ZJGnXdvHl4j1M4xTeskrGhC8SW1KT4buJEjxCsTagCGO2NZeAD": (0.1, 0.0, "json_parse_error"), "20191026T032159Z_AS20057_xLjBSrTyZjOn6C7pa5BPyUxyBhzWHbSooKQjUY9zcWADnkakIR": (0.1, 0.0, "eof_error"), "20191026T051350Z_AS44244_9yjPG1UbgIjtAFg9LiTUxVhq7hGuG3tG4yMnvt6gRJTaFdQme6": ( 0.1, 0.0, "json_processing_error", ), "20191026T071332Z_AS7713_caK9GNyp9ZhN7zL9cg2dg0zGhs44CwHmxZtOyK7B6rBKRaGGMF": (0.1, 0.0, "http_request_failed"), "20191026T093003Z_AS4837_yHZ0f8Oxyhus9vBKAUa0tA2XMSObIO0frShG6YBieBzY9RiSBg": (0.1, 0.0, "connect_error"), "20191026T165434Z_AS0_qPbZHZF8VXUWgzlvqT9Jd7ARuHSl2Dq4tPcEq580rgYZGmV5Um": (0.1, 0.0, "generic_timeout_error"), "20191028T160112Z_AS1640_f4zyjjp5vFcwZkAKPrTokayPRdcXPfdEMRbdo1LmIaLZRile6P": (0.1, 0.0, "broken_pipe"), "20191029T094043Z_AS49048_qGQxBh6lv26TOfuWfhGcUtz2LZWwboXlfbh058CSF1fOmEUv6Z": (0.1, 0.0, "connection_refused"), } for d in range(26, 30): can = cans["dash_2019_10_{}".format(d)] for msm_n, msm in load_can(can): # input is not set or set to None assert msm.get("input", None) is None rid = msm["report_id"] scores = fp.score_measurement(msm, []) if rid in expected: exp_bs, exp_acc, exp_fail = expected[rid] assert scores["blocking_general"] == exp_bs assert scores["accuracy"] == exp_acc expected.pop(rid) assert len(expected) == 0, expected.keys()
def test_score_web_connectivity(cans): debug = 0 blocked = ( "20191029T180431Z_AS50289_5IKNXzKJUvzKQqnlzU5r91F9KiCl1LfRlEBllZVbDHcDQg5TEt", "20191029T180509Z_AS50289_CqU5a3scgi1JJ8cWEYEMSqLUzseS0uIbnWcnGSKKlW1BMbnLc5", ) nonblocked = ( "20191029T180447Z_AS50289_yWeX5dJzPeh9Pk3TddqG2eO3BvLGT2SOWmOK0lhR7aRV0XX1RC", "20191029T180452Z_AS50289_IIuYcQRCGA9S2cj5zFABEOvMbyXSKBExWywVgZkpe5l1uAqyT5", "20191029T180525Z_AS50289_UfjRU99n2edoDn9PeWnqyGxHVorOAxBFwZj3WPQ24sl2ii4gC2", ) # In this msmt the probe follows a redirect and lands on a page with a # title in russian, while the probe gets title " - " # https://explorer.ooni.org/measurement/20191101T071829Z_AS0_sq5lk0Y4jhCECrgk2pAgMWlgOczBLDkIb2OE9QnHf1OEOmwOBz?input=http://www.pravda.ru # The probe uses: # (body_length_match or headers_match or title_match) and (status_code_match != false) for can_fn, msm in s3msmts("web_connectivity", start_date=date(2019, 11, 1)): rid = msm["report_id"] inp = msm["input"] scores = fp.score_measurement(msm, []) bl = sum(scores[k] for k in scores if k.startswith("blocking_")) if rid in blocked: assert bl > 0 elif rid in nonblocked: assert bl < 0.3 elif debug and bl > 0: print("https://explorer.ooni.org/measurement/{}?input={}".format(rid, inp)) print_msm(msm) print(scores) assert 0
def test_whatsapp(cans): can = cans["whatsapp"] debug = False for msm_n, msm in load_can(can): scores = fp.score_measurement(msm, []) if msm["report_id"] == "20190830T002828Z_AS209_fDHPMTveZ66kGmktmW8JiGDgqAJRivgmBkZjAVRmFbH92OIlTX": assert scores == { "blocking_general": 0.8, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }, msm if msm["report_id"] == "20190829T002541Z_AS29119_kyaEYabRxQW6q41n4kPH9aX5cvFEXNheCj1fguSf4js3JydUbr": # The probe is reporting a false positive: due to the empty client headers # it hits https://www.whatsapp.com/unsupportedbrowser print_msm(msm) assert scores == { "blocking_general": 0.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }, msm # To inspect the test dataset for false positives run this: if debug and scores["blocking_general"] > 0: print_msm(msm) print(scores) raise Exception("debug")
def test_score_http_requests(): cnt = 0 for can_fn, msm in s3msmts("http_requests", date(2016, 12, 29), date(2016, 12, 29)): assert msm["test_name"] == "http_requests" erid = "20161225T225955Z_AS200938_ffNnCYb1F8ih0MnomQro2ktalI7d8KnHGwQUXs0ZaqbQHTxBG1" erid = "20161225T025526Z_AS200938_FAmaY6pHD0AoFH5DO9I9ppLP1TGnVMkrXszEUc0N7msaGcEUgt" skip = [ "XA4JPdyzcoVgo0tWp6xzcBmxxwBGW92uR8rYdxk4843IMeA3iPgBJZ0Y5cqoIMvN", "CkXZQnaB77inMBE161Mnh0VDPAJYRioSRzXVX8QMNdiFyfCdMMDod9X5MGmsbd20", "CkXZQnaB77inMBE161Mnh0VDPAJYRioSRzXVX8QMNdiFyfCdMMDod9X5MGmsbd20", ] rid = msm["report_id"] if rid in skip: continue # if rid != erid: # continue cnt += 1 if cnt > 3000: break print(rid) scores = fp.score_measurement(msm) if rid == "20200601T000014Z_AS8339_RC9uUMBtq5AkMLx6xDtTxEciPvd171jQaYx1i3dDbhH27PemEx": assert scores == { "blocking_general": 0.05714285714285714, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }
def test_score_measurement_confirmed(): msm = { "input": "foo", "measurement_start_time": "", "probe_asn": "1", "report_id": "123", "test_name": "web_connectivity", "test_start_time": "", "probe_cc": "IT", "test_keys": { "requests": [{ "response": { "body": "GdF Stop Page" } }] }, } scores = fp.score_measurement(msm) assert scores == { "accuracy": 0.0, "blocking_general": 1.0, "blocking_global": 0.0, "blocking_country": 1.0, "blocking_isp": 0.0, "blocking_local": 0.0, "confirmed": True, }
def test_score_measurement_simple(): msm = { "input": "foo", "measurement_start_time": "", "probe_asn": "1", "report_id": "123", "test_name": "web_connectivity", "test_start_time": "", "probe_cc": "IE", "test_keys": {}, } matches = [] scores = fp.score_measurement(msm, matches) assert scores == { "input": "foo", "measurement_start_time": "", "probe_asn": "1", "probe_cc": "IE", "report_id": "123", "test_name": "web_connectivity", "test_start_time": "", "scores": { "blocking_general": 0.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }, }
def test_score_vanilla_tor(cans): cnt = 0 blocked_cnt = 0 total_score = 0 for d in range(26, 30): can = cans["tor_2019_10_{}".format(d)] for msm_n, msm in load_can(can): scores = fp.score_measurement(msm) rid = msm["report_id"] cnt += 1 if rid == "20191029T012425Z_AS45194_So00Y296Ve6q1TvjOtKqsvH1ieiVF566PlcUUOw4Ia37HGPwPL": # timeout assert scores["blocking_general"] > 0 blocked_cnt += 1 total_score += scores["blocking_general"] elif scores["blocking_general"] > 0: blocked_cnt += 1 total_score += scores["blocking_general"] # print("https://explorer.ooni.org/measurement/{}".format(rid)) # print_msm(msm) # print(scores) # assert 0 p = blocked_cnt * 100 / cnt assert 0.35 < p < 0.36, p avg = total_score / cnt assert 0.003 < avg < 0.004
def test_score_tcp_connect(cans): # tcp_connect msmts are identified by (report_id / input) debug = 0 can = cans["tcp_connect_2018_10_26"] for msm_n, msm in load_can(can): rid = msm["report_id"] inp = msm["input"] scores = fp.score_measurement(msm) if rid == "20181026T000102Z_AS51570_2EslrKCu0NhDQiCIheVDvilWchWShK6GTC7Go6i31VQrGfXRLM": if inp == "109.105.109.165:22": # generic_timeout_error assert scores["blocking_general"] == 0.8 elif inp == "obfs4 83.212.101.3:50000": # connection_refused_error assert scores["blocking_general"] == 0.8 elif inp == "178.209.52.110:22": # connect_error assert scores["blocking_general"] == 0.8 elif inp == "obfs4 178.209.52.110:443": # tcp_timed_out_error assert scores["blocking_general"] == 0.8 elif debug and scores["blocking_general"] > 0.7: print("https://explorer.ooni.org/measurement/{}".format(rid)) print_msm(msm) print(scores) assert 0
def test_facebook_messenger_newer(cans): can = cans["facebook_messenger2"] # from 2019-10-29 blocked_cnt = 0 debug = False for msm_n, msm in load_can(can): scores = fp.score_measurement(msm) rid = msm["report_id"] if rid == "20191029T101630Z_AS56040_bBOkNtg65fMfH0iOHiG8lMk4UmERxjfJL20ki33lKlyKjS0FkP": # TCP really blocked assert scores["blocking_general"] >= 1.0 continue elif rid == "20191029T020948Z_AS50010_ZUPoP3hOdwazqZnzPurdWgfLvoMcDL1qyOHHFtEtISjNWMgkrX": # DNS returns mostly 0.0.0.0 - but one connection succeeds assert scores["blocking_general"] >= 1.0 continue elif scores["blocking_general"] > 0: blocked_cnt += 1 if debug: print_msm(msm) print(scores) ratio = blocked_cnt / (msm_n + 1) * 100 assert ratio > 7.656 assert ratio < 7.657
def test_facebook_messenger(cans): can = cans["facebook_messenger"] debug = False for msm_n, msm in load_can(can): scores = fp.score_measurement(msm) if msm["report_id"] != "20190829T105137Z_AS6871_TJfyRlEkm6BaCfszHr06nC0c9UsWjWt8mCxRBw1jr0TeqcHTiC": continue if msm["report_id"] == "20190829T105137Z_AS6871_TJfyRlEkm6BaCfszHr06nC0c9UsWjWt8mCxRBw1jr0TeqcHTiC": # not blocked assert scores == { "blocking_general": 0.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }, msm # TODO: add more # To inspect the test dataset for false positives run this: elif debug and scores["blocking_general"] > 0: print_msm(msm) print(scores) if debug: raise Exception("debug")
def score_measurement_and_upsert_fastpath(msm, msmt_uid, do_update: bool) -> None: scores = score_measurement(msm) anomaly = scores.get("blocking_general", 0.0) > 0.5 failure = scores.get("accuracy", 1.0) < 0.5 confirmed = scores.get("confirmed", False) sw_name = msm.get("software_name", "unknown") sw_version = msm.get("software_version", "unknown") platform = "unset" if "annotations" in msm and isinstance(msm["annotations"], dict): platform = msm["annotations"].get("platform", "unset") db.upsert_summary( msm, scores, anomaly, confirmed, failure, msmt_uid, sw_name, sw_version, platform, do_update, )
def test_score_meek_fronted_requests_test(cans): debug = 0 for d in range(26, 30): can = cans["meek_2019_10_{}".format(d)] for msm_n, msm in load_can(can): rid = msm["report_id"] scores = fp.score_measurement(msm) if rid == "20191026T110224Z_AS3352_2Iqv4PvPItJ2Z3D46wVRHzesBpdDJZ8xDKH7VKqNTebaiGopDY": # response: None assert scores["blocking_general"] == 1.0 elif rid == "20191026T000021Z_AS137_0KaXWBZgn8W6iMfKKhjHJPoPPovChlwxr8dDOh4LxTzHDOKLOq": # One response: 404 assert scores["blocking_general"] == 1.0 elif rid == "20191026T000034Z_AS42668_vpZnPVKEym0dRgYSxyeZulPvnLtxrh6HXzyMx5tE2f4x26CBwX": # 403 hitting cloudfront # Content-Type: text/html # Date: Sat, 26 Oct 2019 01:01:21 GMT # Server: CloudFront # Via: 1.1 60858c13889b9be849ae025edc06577d.cloudfront.net (CloudFront) # X-Amz-Cf-Pop: ARN53 # X-Cache: Error from cloudfront assert scores["blocking_general"] == 1.0 elif rid == "20191026T001625Z_AS19108_G9uGTtyJCiOzeCm4jHsP6r8WRZ8cWx07wvcjwAVmrTshJ8WYwA": # requests: is empty assert scores["accuracy"] == 0 elif debug: print("https://explorer.ooni.org/measurement/{}".format(rid)) print_msm(msm) print(scores) assert 0
def test_telegram(cans): can = cans["telegram"] for msm_n, msm in load_can(can): scores = fp.score_measurement(msm, []) if msm["report_id"] == "20190830T002837Z_AS209_3nMvNkLIqSZMLqRiaiQylAuHxu6qpK7rVJcAA9Dv2UpcNMhPH0": assert scores == { "blocking_general": 1.5, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, "web_failure": None, "accessible_endpoints": 10, "unreachable_endpoints": 0, "http_success_cnt": 0, "http_failure_cnt": 0, }, msm elif msm["report_id"] == "20190829T205910Z_AS45184_0TVMQZLWjkfOdqA5b5nNF1XHrafTD4H01GnVTwvfzfiLyLc45r": assert scores == { "blocking_general": 1.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, "web_failure": "connection_reset", "accessible_endpoints": 10, "unreachable_endpoints": 0, "http_success_cnt": 10, "http_failure_cnt": 0, "msg": "Telegam failure: connection_reset", } elif msm["report_id"] == "20190829T210302Z_AS197207_28cN0a47WSIxF3SZlXvceoLCSk3rSkyeg0n07pKGAi7XYyEQXM": assert scores == { "blocking_general": 3.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, "web_failure": "generic_timeout_error", "accessible_endpoints": 0, "unreachable_endpoints": 10, "http_success_cnt": 0, "http_failure_cnt": 10, "msg": "Telegam failure: generic_timeout_error", } elif msm["report_id"] == "20190829T220118Z_AS16345_28eP4Hw7PQsLmb4eEPWitNvIZH8utHddaTbWZ9qFcaZudmHPfz": assert scores == { "blocking_general": 3.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, "web_failure": "connect_error", "accessible_endpoints": 0, "unreachable_endpoints": 10, "http_success_cnt": 0, "http_failure_cnt": 10, "msg": "Telegam failure: connect_error", }
def test_score_torsf(): msm = loadj("torsf_1") scores = fp.score_measurement(msm) assert scores == { "blocking_country": 0.0, "blocking_general": 1.0, "blocking_global": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }
def test_score_meek2(): # msmt from legacy probes having a list as "input" msm = loadj("meek2") scores = fp.score_measurement(msm) assert scores == { "blocking_country": 0.0, "blocking_general": 0.0, "blocking_global": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }
def test_bug_test_keys_None(): msm = loadj("test_keys_none") scores = fp.score_measurement(msm) assert scores == { "accuracy": 0.0, "blocking_general": 0.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }
def test_score_vanilla_tor_2018(cans): can = cans["tor_2018_10_26"] timeouts = ( "20181026T003600Z_AS4134_SIts9rD3mrpgIrxrBy6NY7LHJGsBm2dbV4Q8rOHnFEQVESMqB1", "20181026T154843Z_AS57963_GKCdB85BgIqr5frZ2Z8qOXVZgdpNGajLRXSidMeRVWg8Qvto3e", ) for msm_n, msm in load_can(can): scores = fp.score_measurement(msm) rid = msm["report_id"] if rid in timeouts: # Real timeout assert scores["blocking_general"] > 0
def test_score_tor(): fn = "fastpath/tests/data/tor.json" with open(fn) as f: msm = ujson.load(f) scores = fp.score_measurement(msm) assert scores == { "blocking_general": 0.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }
def test_bug_test_keys_None(): with open("fastpath/tests/data/test_keys_none.json") as f: msm = ujson.load(f) scores = fp.score_measurement(msm) assert scores == { "accuracy": 0.0, "blocking_general": 0.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }
def test_score_http_requests(): # failed msm = loadj("http_requests_1") scores = fp.score_measurement(msm) assert scores == { "accuracy": 0.0, "blocking_country": 0.0, "blocking_general": 0.0, "blocking_global": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }
def test_score_riseupvpn(): msm = loadj("riseupvpn") scores = fp.score_measurement(msm) assert scores == { "blocking_general": 1.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, "extra": { "test_runtime": 1.076507343 }, }
def test_score_tor(): msm = loadj("tor") scores = fp.score_measurement(msm) assert scores == { "blocking_general": 0.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, "extra": { "test_runtime": 0.767114298 }, }
def test_score_meek2(): # msmt from legacy probes having a list as "input" fn = "fastpath/tests/data/meek2.json" with open(fn) as f: msm = ujson.load(f) scores = fp.score_measurement(msm) assert scores == { "blocking_country": 0.0, "blocking_general": 0.0, "blocking_global": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }
def test_facebook_messenger_bug(cans): can = cans["facebook_messenger"] for msm_n, msm in load_can(can): scores = fp.score_measurement(msm) if msm["report_id"] != "20190829T000015Z_AS137_6FCvPkYvOAPUqKgO8QdllyWXTPXUbUAVV3cA43E6drE0KAe4iO": continue assert scores == { "blocking_general": 0.0, "blocking_global": 0.0, "blocking_country": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }
def test_score_torsf2(): msm = loadj("torsf_2") scores = fp.score_measurement(msm) assert scores == { "blocking_country": 0.0, "blocking_general": 0.0, "blocking_global": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, "extra": { "bootstrap_time": 78.980935917, "test_runtime": 79.196301917 }, }
def test_score_http_requests(): # failed fn = "fastpath/tests/data/http_requests_1.json" with open(fn) as f: msm = ujson.load(f) scores = fp.score_measurement(msm) assert scores == { "accuracy": 0.0, "blocking_country": 0.0, "blocking_general": 0.0, "blocking_global": 0.0, "blocking_isp": 0.0, "blocking_local": 0.0, }
def test_whatsapp_probe_bug(cans): # https://github.com/ooni/probe-engine/issues/341 debug = False for can_fn, msm in s3msmts("whatsapp", date(2020, 1, 1), date(2020, 1, 10)): scores = fp.score_measurement(msm) assert scores["blocking_general"] in (0.0, 1.0) if "analysis" in scores: assert scores["analysis"]["whatsapp_web_accessible"] in ( True, False), ujson.dumps(msm, indent=1, sort_keys=True) if debug and scores["blocking_general"] > 0: print_msm(msm) print(scores) raise Exception("debug")