-
Notifications
You must be signed in to change notification settings - Fork 0
/
mmanika_HW3_Q3.py
73 lines (67 loc) · 2.72 KB
/
mmanika_HW3_Q3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import json
from tld import get_fld
from adblockparser import AdblockRules
from prettytable import PrettyTable
from urllib.parse import urlparse
def main(harfile_path, domain, blockfile):
harfile = open(harfile_path, "r")
harfile_json = json.loads(harfile.read())
harfile.close()
bfile = open(blockfile, "r")
block_list = bfile.readlines()
bfile.close()
rules = AdblockRules(block_list)
adblock_db = {
"url_data": {},
"stats": {
"domain": domain,
"req": 0,
"succ": 0,
"block": 0
}
}
options = ('image', 'xmlhttprequest', 'document', 'font', 'script', 'stylesheet', 'other')
for entry in harfile_json['log']['entries']:
url = entry['request']['url']
urlparts = urlparse(url)
print("Processing {} ...".format(url))
try:
fld = get_fld(url, fail_silently=True)
adblock_db["stats"]["req"] += 1
if fld != domain:
d = {}
if entry["_resourceType"] == "xhr":
entry["_resourceType"] = "xmlhttprequest"
if entry["_resourceType"] not in options:
d = {"third-party": True, "domain": urlparts.hostname}
else:
d = {entry["_resourceType"]: True, "third-party": True, "domain": urlparts.hostname}
if rules.should_block(url, d):
adblock_db["stats"]["block"] += 1
else:
adblock_db["stats"]["succ"] += 1
else:
if entry["_resourceType"] == "xhr":
entry["_resourceType"] = "xmlhttprequest"
if entry["_resourceType"] not in options:
d = {"third-party": False, "domain": urlparts.hostname}
else:
d = {entry["_resourceType"]: True, "third-party": False, "domain": urlparts.hostname}
if rules.should_block(url, d):
adblock_db["stats"]["block"] += 1
else:
adblock_db["stats"]["succ"] += 1
except:
continue
return adblock_db
if __name__ == '__main__':
file1 = "www.cnn.com.har"
file2 = "www.macys.com.har"
adblock_rules_file = "easylist.txt"
result1 = main(file1, "cnn.com", adblock_rules_file)
result2 = main(file2, "macys.com", adblock_rules_file)
result_table = PrettyTable()
result_table.field_names = ["Site", "# of total HTTP Requests", "# of HTTP Requests Blocked"]
result_table.add_row(["www.cnn.com", result1["stats"]["req"], result1["stats"]["block"]])
result_table.add_row(["www.macys.com", result2["stats"]["req"], result2["stats"]["block"]])
print(result_table)