Beispiel #1
0
    def test_update_visit(self):
        test_url = "http://example.com"
        start_time = strftime("%Y%m%d-%H%M%S")
        be = cm.BrowserEvent()
        be.event_type = cm.EVENT_NEW_VISIT
        vi = cm.VisitInfo()
        vi.url = test_url
        vi.start_time = start_time
        vi.out_db = self.test_db
        vi.duration = 0
        vi.incomplete = 1

        vi.visit_id = dbu.insert_to_db(dbu.DBCmd.ADD_VISIT, be, vi)
        vi_read = dbu.get_db_entry(self.test_db, dbu.DBCmd.VISIT_BY_ID,
                                   vi.visit_id)
        self.assertEqual(vi.duration, vi_read.duration)
        self.assertEqual(vi.incomplete, vi_read.incomplete)

        vi.duration = 33
        vi.incomplete = 0
        dbu.insert_to_db(dbu.DBCmd.UPDATE_VISIT, be, vi)
        vi_read = dbu.get_db_entry(self.test_db, dbu.DBCmd.VISIT_BY_ID,
                                   vi.visit_id)
        self.assertEqual(vi.duration, vi_read.duration)
        self.assertEqual(vi.incomplete, vi_read.incomplete)
    def test_update_visit(self):
        test_url = "http://example.com"
        start_time = strftime("%Y%m%d-%H%M%S")
        be = cm.BrowserEvent()
        be.event_type = cm.EVENT_NEW_VISIT
        vi = cm.VisitInfo()
        vi.url = test_url
        vi.start_time = start_time
        vi.out_db = self.test_db
        vi.duration = 0
        vi.incomplete = 1

        vi.visit_id = dbu.insert_to_db(dbu.DBCmd.ADD_VISIT, be, vi)
        vi_read = dbu.get_db_entry(self.test_db, dbu.DBCmd.VISIT_BY_ID,
                                   vi.visit_id)
        self.assertEqual(vi.duration, vi_read.duration)
        self.assertEqual(vi.incomplete, vi_read.incomplete)

        vi.duration = 33
        vi.incomplete = 0
        dbu.insert_to_db(dbu.DBCmd.UPDATE_VISIT, be, vi)
        vi_read = dbu.get_db_entry(self.test_db, dbu.DBCmd.VISIT_BY_ID,
                                   vi.visit_id)
        self.assertEqual(vi.duration, vi_read.duration)
        self.assertEqual(vi.incomplete, vi_read.incomplete)
def get_flash_evercookies(db1, db2, other_prof_dbs, seed_prof):
    seed_prof_cookie_db = j(seed_prof, "cookies.sqlite")
    lsos_by_visit = defaultdict(list)

    lsos_db1 = split_lsos(get_distinct_items(db1,
                                             ("content", "flash_cookies")))
    lsos_db2 = split_lsos(get_distinct_items(db2,
                                             ("content", "flash_cookies")))
    lsos_db3 = get_flash_cookies_from_dbs(other_prof_dbs)

    # lsos_db3 = get_distinct_items(db3, ("content", "flash_cookies"))

    print len(lsos_db2), "lsos in db 2", len(lsos_db3), "lsos in db 3"
    print len(lsos_db1 & lsos_db2), "common lsos in db 1 & 2"

    for lso_item in dbu.get_db_entry(db1, dbu.DBCmd.GET_FLASH_COOKIES, None):
        content = lso_item[7]
        v_id = lso_item[1]
        lso_id = lso_item[0]
        splitted = split_lso(content)
        for item in splitted:
            if (item and len(item) > 5 and item in lsos_db2 and
                    item not in lsos_db3):
                if "es|utmccn" in content:
                    print "*******", content, lso_id, splitted
                lsos_by_visit[(v_id, lso_id)].append({"item": item,
                                                      "path": lso_item[5],
                                                      "content": content,
                                                      "key": lso_item[6],
                                                      "domain": lso_item[3]})

    print len(lsos_by_visit), "lsos in db 1"
    return grep_in_visit_and_profile_data(seed_prof_cookie_db, (db1, db2),
                                          lsos_by_visit, dbu.DBTable.LSO,
                                          other_prof_dbs)
def grep_in_visit_and_profile_data(seed_cookie_db, visit_dbs, lsos_by_visit,
                                   exclude_table, other_prof_dbs):
    looked_items = Set()
    for (v_id, _), lso_dicts in lsos_by_visit.iteritems():
        for lso_dict in lso_dicts:
            match = lso_dict["item"]
            if match in looked_items:
                continue

            looked_items.add(match)
            # find the cookies in the original seeded profile that match the common LSO IDs.
            cookies = dbu.get_db_entry(seed_cookie_db, dbu.DBCmd.GREP_IN_PROFILE_DATA,
                                       (match, v_id, exclude_table))
            # since the cookies are removed from the profile before seeding,
            # the cookies in found in the subsequent visits must have been
            # respawned. By "seeding" we mean copying the LSOs from a profile
            # to another computer to allow sites to exploit LSOs but nothing
            # else (e.g. cookies). See Section 4 of the paper for a detailed
            # explanation of the method.
            # https://securehomes.esat.kuleuven.be/~gacar/persistent/the_web_never_forgets.pdf
            if len(cookies["cookie"]):
                vis1_cookies = dbu.get_db_entry(visit_dbs[0],
                                                dbu.DBCmd.GREP_IN_VISIT_COOKIES,
                                                match)
                vis2_cookies = dbu.get_db_entry(visit_dbs[1],
                                                dbu.DBCmd.GREP_IN_VISIT_COOKIES,
                                                match)
                if len(vis1_cookies) and len(vis2_cookies):
                    # the cookie should not be found in visit data from an
                    # unrelated profile.
                    other_vis_cookies = dbu.get_db_entry(other_prof_dbs[0],
                                                         dbu.DBCmd.GREP_IN_VISIT_COOKIES,
                                                         match)
                    if not len(other_vis_cookies):
                        prof_cookies_ul =\
                            get_html_from_moz_cookies(cookies["cookie"])
                        visit1_cookies_ul =\
                            get_html_from_visit_cookies(vis1_cookies)
                        visit2_cookies_ul =\
                            get_html_from_visit_cookies(vis2_cookies)
                        yield match, lso_dict["key"], lso_dict["content"],\
                            lso_dict["domain"], lso_dict["path"],\
                            prof_cookies_ul, visit1_cookies_ul,\
                            visit2_cookies_ul
                    else:
                        print "Found in other db", match
Beispiel #5
0
def grep_in_visit_and_profile_data(seed_cookie_db, visit_dbs, lsos_by_visit,
                                   exclude_table, other_prof_dbs):
    looked_items = Set()
    for (v_id, _), lso_dicts in lsos_by_visit.iteritems():
        for lso_dict in lso_dicts:
            match = lso_dict["item"]
            if match in looked_items:
                continue

            looked_items.add(match)
            # find the cookies in the original seeded profile that match the common LSO IDs.
            cookies = dbu.get_db_entry(seed_cookie_db,
                                       dbu.DBCmd.GREP_IN_PROFILE_DATA,
                                       (match, v_id, exclude_table))
            # since the cookies are removed from the profile before seeding,
            # the cookies in found in the subsequent visits must have been
            # respawned. By "seeding" we mean copying the LSOs from a profile
            # to another computer to allow sites to exploit LSOs but nothing
            # else (e.g. cookies). See Section 4 of the paper for a detailed
            # explanation of the method.
            # https://securehomes.esat.kuleuven.be/~gacar/persistent/the_web_never_forgets.pdf
            if len(cookies["cookie"]):
                vis1_cookies = dbu.get_db_entry(
                    visit_dbs[0], dbu.DBCmd.GREP_IN_VISIT_COOKIES, match)
                vis2_cookies = dbu.get_db_entry(
                    visit_dbs[1], dbu.DBCmd.GREP_IN_VISIT_COOKIES, match)
                if len(vis1_cookies) and len(vis2_cookies):
                    # the cookie should not be found in visit data from an
                    # unrelated profile.
                    other_vis_cookies = dbu.get_db_entry(
                        other_prof_dbs[0], dbu.DBCmd.GREP_IN_VISIT_COOKIES,
                        match)
                    if not len(other_vis_cookies):
                        prof_cookies_ul =\
                            get_html_from_moz_cookies(cookies["cookie"])
                        visit1_cookies_ul =\
                            get_html_from_visit_cookies(vis1_cookies)
                        visit2_cookies_ul =\
                            get_html_from_visit_cookies(vis2_cookies)
                        yield match, lso_dict["key"], lso_dict["content"],\
                            lso_dict["domain"], lso_dict["path"],\
                            prof_cookies_ul, visit1_cookies_ul,\
                            visit2_cookies_ul
                    else:
                        print "Found in other db", match
 def check_localstorage_db_ops(self, ls_items):
     # dbu.insert_to_db(dbu.DBCmd.ADD_LOCALSTORAGE_ITEMS, ls_items, self.vi)
     ls_items_db = dbu.get_db_entry(self.vi.out_db,
                                    dbu.DBCmd.LOCALSTORAGE_BY_VISIT_ID,
                                    self.vi.visit_id).fetchall()
     self.assertEqual(len(ls_items_db), 1)
     for ls_row in ls_items_db:
         _, _, url, scope, key, value = ls_row
         self.assertEqual(scope, EXPECTED_LS_ORIGIN)
         self.assertEqual(key, EXPECTED_LS_KEY)
         self.assertEqual(value, EXPECTED_LS_VALUE)
         self.assertEqual(url, LS_TEST_URL)
def count_inclusion(db_file, domain):
    # % of sites that include a domain
    includers = set()
    db_rows = dbu.get_db_entry(db_file, dbu.DBCmd.GREP_IN_REQ_URLS, domain)
    for db_row in db_rows:
        rank = db_row[9]
        if rank not in includers:
            if get_tld(db_row[3]) == domain:
                includers.add(rank)  # rank
                # print rank, db_row[3]
    print len(includers), "includes", domain
    return includers
def count_inclusion(db_file, domain):
    # % of sites that include a domain
    includers = set()
    db_rows = dbu.get_db_entry(db_file,
                               dbu.DBCmd.GREP_IN_REQ_URLS, domain)
    for db_row in db_rows:
        rank = db_row[9]
        if rank not in includers:
            if get_tld(db_row[3]) == domain:
                includers.add(rank)  # rank
                # print rank, db_row[3]
    print len(includers), "includes", domain
    return includers
 def test_r_w_visit_to_db(self):
     test_url = "http://example.com"
     start_time = strftime("%Y%m%d-%H%M%S")
     be = cm.BrowserEvent()
     be.event_type = cm.EVENT_NEW_VISIT
     vi = cm.VisitInfo()
     vi.url = test_url
     vi.start_time = start_time
     vi.out_db = self.test_db
     visit_id = dbu.insert_to_db(dbu.DBCmd.ADD_VISIT, be, vi)
     vi_read = dbu.get_db_entry(self.test_db, dbu.DBCmd.VISIT_BY_ID,
                                visit_id)
     self.assertEqual(vi.url, vi_read.url)
     self.assertEqual(vi.start_time, vi_read.start_time)
Beispiel #10
0
 def test_r_w_visit_to_db(self):
     test_url = "http://example.com"
     start_time = strftime("%Y%m%d-%H%M%S")
     be = cm.BrowserEvent()
     be.event_type = cm.EVENT_NEW_VISIT
     vi = cm.VisitInfo()
     vi.url = test_url
     vi.start_time = start_time
     vi.out_db = self.test_db
     visit_id = dbu.insert_to_db(dbu.DBCmd.ADD_VISIT, be, vi)
     vi_read = dbu.get_db_entry(self.test_db, dbu.DBCmd.VISIT_BY_ID,
                                visit_id)
     self.assertEqual(vi.url, vi_read.url)
     self.assertEqual(vi.start_time, vi_read.start_time)
Beispiel #11
0
 def test_lso_db_ops(self):
     lso_events = lso.parse_strace_logs(self.vi, test_lso=self.lso_file)
     dbu.insert_to_db(dbu.DBCmd.ADD_LSO_ITEMS, lso_events, self.vi)
     lso_items_db = dbu.get_db_entry(self.vi.out_db,
                                     dbu.DBCmd.GET_FLASH_COOKIES,
                                     self.vi.visit_id).fetchall()
     self.assertEqual(len(lso_items_db), 1)
     lso_event = lso_items_db[0]
     page_url, domain, filename, local_path, key, content = lso_event[2:8]
     self.assertEqual(page_url, self.vi.url)
     self.assertEqual(domain, cm.ONLINE_TEST_HOST)
     self.assertEqual(filename, TEST_LSO_FILENAME)
     self.assertEqual(local_path, TEST_LSO_REL_PATH)
     self.assertEqual(key, TEST_LSO_KEYNAME)
     self.assertEqual(content, TEST_LSO_VALUE)
Beispiel #12
0
 def test_lso_db_ops(self):
     lso_events = lso.parse_strace_logs(self.vi, test_lso=self.lso_file)
     dbu.insert_to_db(dbu.DBCmd.ADD_LSO_ITEMS, lso_events, self.vi)
     lso_items_db = dbu.get_db_entry(self.vi.out_db,
                                     dbu.DBCmd.GET_FLASH_COOKIES,
                                     self.vi.visit_id).fetchall()
     self.assertEqual(len(lso_items_db), 1)
     lso_event = lso_items_db[0]
     page_url, domain, filename, local_path, key, content = lso_event[2:8]
     self.assertEqual(page_url, self.vi.url)
     self.assertEqual(domain, cm.ONLINE_TEST_HOST)
     self.assertEqual(filename, TEST_LSO_FILENAME)
     self.assertEqual(local_path, TEST_LSO_REL_PATH)
     self.assertEqual(key, TEST_LSO_KEYNAME)
     self.assertEqual(content, TEST_LSO_VALUE)
 def test_r_w_canvas_to_db(self):
     be = cm.BrowserEvent()
     be.event_type = cm.EVENT_TODATAURL
     be.url = "http://example.com"
     be.js_file = "http://example.com/fp.js"
     be.js_line = 5
     be.txt = "data:asdsads"
     vi = cm.VisitInfo()
     vi.visit_id = 1
     vi.out_db = self.test_db
     canvas_ev_id = dbu.insert_to_db(dbu.DBCmd.ADD_CANVAS, be, vi)
     self.assertGreater(canvas_ev_id, 0)
     visit_id, data_url_id, event_time, be_db = \
         dbu.get_db_entry(self.test_db, dbu.DBCmd.CANVAS_BY_ID,
                          canvas_ev_id)
     self.assertEqual(vi.visit_id, visit_id)
     self.assertEqual(data_url_id, 1)
     self.assertEqual(be_db.event_type, be.event_type)
     self.assertEqual(be_db.url, be.url)
     self.assertEqual(be_db.js_file, be.js_file)
     self.assertEqual(be_db.js_line, be.js_line)
     self.assertEqual(event_time, 0)
Beispiel #14
0
 def test_r_w_canvas_to_db(self):
     be = cm.BrowserEvent()
     be.event_type = cm.EVENT_TODATAURL
     be.url = "http://example.com"
     be.js_file = "http://example.com/fp.js"
     be.js_line = 5
     be.txt = "data:asdsads"
     vi = cm.VisitInfo()
     vi.visit_id = 1
     vi.out_db = self.test_db
     canvas_ev_id = dbu.insert_to_db(dbu.DBCmd.ADD_CANVAS, be, vi)
     self.assertGreater(canvas_ev_id, 0)
     visit_id, data_url_id, event_time, be_db = \
         dbu.get_db_entry(self.test_db, dbu.DBCmd.CANVAS_BY_ID,
                          canvas_ev_id)
     self.assertEqual(vi.visit_id, visit_id)
     self.assertEqual(data_url_id, 1)
     self.assertEqual(be_db.event_type, be.event_type)
     self.assertEqual(be_db.url, be.url)
     self.assertEqual(be_db.js_file, be.js_file)
     self.assertEqual(be_db.js_line, be.js_line)
     self.assertEqual(event_time, 0)
Beispiel #15
0
def get_flash_evercookies(db1, db2, other_prof_dbs, seed_prof):
    seed_prof_cookie_db = j(seed_prof, "cookies.sqlite")
    lsos_by_visit = defaultdict(list)

    lsos_db1 = split_lsos(get_distinct_items(db1,
                                             ("content", "flash_cookies")))
    lsos_db2 = split_lsos(get_distinct_items(db2,
                                             ("content", "flash_cookies")))
    lsos_db3 = get_flash_cookies_from_dbs(other_prof_dbs)

    # lsos_db3 = get_distinct_items(db3, ("content", "flash_cookies"))

    print len(lsos_db2), "lsos in db 2", len(lsos_db3), "lsos in db 3"
    print len(lsos_db1 & lsos_db2), "common lsos in db 1 & 2"

    for lso_item in dbu.get_db_entry(db1, dbu.DBCmd.GET_FLASH_COOKIES, None):
        content = lso_item[7]
        v_id = lso_item[1]
        lso_id = lso_item[0]
        splitted = split_lso(content)
        for item in splitted:
            if (item and len(item) > 5 and item in lsos_db2
                    and item not in lsos_db3):
                if "es|utmccn" in content:
                    print "*******", content, lso_id, splitted
                lsos_by_visit[(v_id, lso_id)].append({
                    "item": item,
                    "path": lso_item[5],
                    "content": content,
                    "key": lso_item[6],
                    "domain": lso_item[3]
                })

    print len(lsos_by_visit), "lsos in db 1"
    return grep_in_visit_and_profile_data(seed_prof_cookie_db, (db1, db2),
                                          lsos_by_visit, dbu.DBTable.LSO,
                                          other_prof_dbs)
def gen_crawl_report(db_file,
                     db_pass2=None,
                     db_other_profs=None,
                     prof_dir=None):
    """ visits_cnt, cookies, localstorage, flash cookies, cache, indexeddb,
    http reqs/resps
    canvas: list distinct FPers, linked to the sites that include this FPer
    evercookie: list potential evercookies by searching ID-like common strings
    among different vectors"""
    out_dir = os.path.dirname(db_file)
    crawl_name = os.path.basename(os.path.dirname(db_file))
    figs = []  # figures to be plotted, removed for now.
    respawned = []

    if db_pass2 and db_other_profs and prof_dir:
        respawned = ev.get_flash_evercookies(db_file, db_pass2, db_other_profs,
                                             prof_dir)

    start, end = dbu.get_db_entry(db_file, dbu.DBCmd.GET_VISIT_DATES, False)
    visits_cnt = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_VISITS, False)[0]
    completed_visits_cnt = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_VISITS,
                                            True)[0]
    cookies = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_COOKIES, 0)
    localstorage = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_LOCALSTORAGE, 0)
    print "genreport len(localstorage)", len(localstorage)
    xsite_flash_cookies = get_xsite_flash_cookies(db_file)
    xsite_local_storage = get_xsite_local_storage(db_file)

    try:
        flash_cookie_count = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_LSO, 0)
    except:
        flash_cookie_count = [""]

    canvas_meta_rows = dbu.get_db_entry(db_file, dbu.DBCmd.GET_CANVAS_META, 0)
    canvas_scr_domains = {}
    canvas_events_per_script = {}
    canvas_url_counts = {}
    canvas_domain_counts = {}
    canvas_script_urls = dbu.get_db_entry(db_file,
                                          dbu.DBCmd.GET_CANVAS_SCRIPTS, 0)
    false_positives = []
    for canvas_script_url_tup in canvas_script_urls:
        canvas_script_url = canvas_script_url_tup[0]
        canvas_events = dbu.get_db_entry(db_file,
                                         dbu.DBCmd.GET_CANVAS_EVENTS_BY_SCRIPT,
                                         canvas_script_url)
        if not ca.is_canvas_false_positive(canvas_events):
            scr_evs = dbu.get_db_entry(db_file,
                                       dbu.DBCmd.GET_CANVAS_EVENTS_BY_SCRIPT,
                                       canvas_script_url)
            canvas_events_per_script[canvas_script_url] = scr_evs
            url_cnts = dbu.get_db_entry(db_file,
                                        dbu.DBCmd.COUNT_SITES_BY_CANVAS_SCRIPT,
                                        canvas_script_url)
            canvas_url_counts[canvas_script_url] = url_cnts
            domain = cu.extract_domain(canvas_script_url)
            if domain in canvas_scr_domains:
                canvas_scr_domains[domain].append(canvas_script_url)
            else:
                canvas_scr_domains[domain] = [canvas_script_url]
        else:
            false_positives.append(canvas_script_url_tup)
            # print canvas_script_url_tup

    # Remove false positives
    for false_positive in false_positives:
        canvas_script_urls.remove(false_positive)
    # total_canvas_fp_count = sum()
    all_canvasfp_ranks = {}
    all_canvasfp_ranks_urls = {}
    for canvas_scr_domain, canvas_scr_urls in canvas_scr_domains.iteritems():
        script_ranks_and_urls =\
            dbu.get_db_entry(db_file,
                             dbu.DBCmd.GET_RANK_AND_URLS_BY_CANVAS_SCRIPTS,
                             canvas_scr_urls)
        canvas_domain_counts[canvas_scr_domain] = len(script_ranks_and_urls)
        all_canvasfp_ranks[canvas_scr_domain] = map(lambda x: x[0],
                                                    script_ranks_and_urls)
        all_canvasfp_ranks_urls[canvas_scr_domain] = script_ranks_and_urls

    # print all_canvasfp_ranks
    # fu.write_to_file(j(out_dir, "%s-canvas.json" % crawl_name),
    #                 json.dumps(all_canvasfp_ranks))

    total_canvas_fp_count = sum(canvas_domain_counts.itervalues())

    # print "Total canvas FP count", total_canvas_fp_count
    rank_set = set()
    for _, v in all_canvasfp_ranks.iteritems():
        for rank in v:
            rank_set.add(rank)

    # print "Total canvas FP count - uniq", len(rank_set)

    nameSpace = {
        'title': "Crawl Report",
        'visits_cnt': visits_cnt,
        'completed_visits_cnt': completed_visits_cnt,
        'cookies': cookies[0],
        'localstorage': localstorage[0],
        'flash_cookie_count': flash_cookie_count[0],
        'canvas_meta_rows': canvas_meta_rows,
        'start': start,
        'end': end,
        'canvas_domain_counts': canvas_domain_counts,
        'canvas_url_counts': canvas_url_counts,
        'canvas_events_per_script': canvas_events_per_script,
        'canvas_scr_domains': canvas_scr_domains,
        'total_canvas_fp_count': total_canvas_fp_count,
        'canvas_script_urls': canvas_script_urls,
        'get_tld': cu.extract_domain,
        'xsite_flash_cookies': xsite_flash_cookies,
        'xsite_local_storages': xsite_local_storage,
        'respawned': respawned,
        'figs': figs,
        'canvasfp_ranks_urls': all_canvasfp_ranks_urls,
        # '3rdp_cookies': 3rdp_cookies,
    }
    report_template = Template(template_str, searchList=[nameSpace])
    fu.write_to_file(j(out_dir, "%s-report.html" % crawl_name),
                     str(report_template))
def get_xsite_local_storage(db_file):
    ls_candidates = dbu.get_db_entry(db_file, dbu.DBCmd.GET_XSITE_LOCALSTORAGE,
                                     False)
    #    for ls_candidate in ls_candidates:
    #        print list(ls_candidates)
    return ls_candidates
def get_xsite_flash_cookies(db_file):
    ec_candidates = dbu.get_db_entry(db_file,
                                     dbu.DBCmd.GET_XSITE_FLASH_COOKIES, False)
    for ec_candidate in ec_candidates:
        print list(ec_candidate)
    return ec_candidates
Beispiel #19
0
def gen_crawl_report(db_file, db_pass2=None, db_other_profs=None,
                     prof_dir=None):
    """ visits_cnt, cookies, localstorage, flash cookies, cache, indexeddb,
    http reqs/resps
    canvas: list distinct FPers, linked to the sites that include this FPer
    evercookie: list potential evercookies by searching ID-like common strings
    among different vectors"""
    out_dir = os.path.dirname(db_file)
    crawl_name = os.path.basename(os.path.dirname(db_file))
    figs = []  # figures to be plotted, removed for now.
    respawned = []

    if db_pass2 and db_other_profs and prof_dir:
        respawned = ev.get_flash_evercookies(db_file, db_pass2,
                                             db_other_profs, prof_dir)

    start, end = dbu.get_db_entry(db_file, dbu.DBCmd.GET_VISIT_DATES, False)
    visits_cnt = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_VISITS, False)[0]
    completed_visits_cnt = dbu.get_db_entry(db_file,
                                            dbu.DBCmd.COUNT_VISITS, True)[0]
    cookies = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_COOKIES, 0)
    localstorage = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_LOCALSTORAGE, 0)
    print "genreport len(localstorage)", len(localstorage)
    xsite_flash_cookies = get_xsite_flash_cookies(db_file)
    xsite_local_storage = get_xsite_local_storage(db_file)

    try:
        flash_cookie_count = dbu.get_db_entry(db_file, dbu.DBCmd.COUNT_LSO, 0)
    except:
        flash_cookie_count = [""]

    canvas_meta_rows = dbu.get_db_entry(db_file, dbu.DBCmd.GET_CANVAS_META, 0)
    canvas_scr_domains = {}
    canvas_events_per_script = {}
    canvas_url_counts = {}
    canvas_domain_counts = {}
    canvas_script_urls = dbu.get_db_entry(db_file,
                                          dbu.DBCmd.GET_CANVAS_SCRIPTS, 0)
    false_positives = []
    for canvas_script_url_tup in canvas_script_urls:
        canvas_script_url = canvas_script_url_tup[0]
        canvas_events = dbu.get_db_entry(db_file,
                                         dbu.DBCmd.GET_CANVAS_EVENTS_BY_SCRIPT,
                                         canvas_script_url)
        if not ca.is_canvas_false_positive(canvas_events):
            scr_evs = dbu.get_db_entry(db_file,
                                       dbu.DBCmd.GET_CANVAS_EVENTS_BY_SCRIPT,
                                       canvas_script_url)
            canvas_events_per_script[canvas_script_url] = scr_evs
            url_cnts = dbu.get_db_entry(db_file,
                                        dbu.DBCmd.COUNT_SITES_BY_CANVAS_SCRIPT,
                                        canvas_script_url)
            canvas_url_counts[canvas_script_url] = url_cnts
            domain = cu.extract_domain(canvas_script_url)
            if domain in canvas_scr_domains:
                canvas_scr_domains[domain].append(canvas_script_url)
            else:
                canvas_scr_domains[domain] = [canvas_script_url]
        else:
            false_positives.append(canvas_script_url_tup)
            # print canvas_script_url_tup

    # Remove false positives
    for false_positive in false_positives:
        canvas_script_urls.remove(false_positive)
    # total_canvas_fp_count = sum()
    all_canvasfp_ranks = {}
    all_canvasfp_ranks_urls = {}
    for canvas_scr_domain, canvas_scr_urls in canvas_scr_domains.iteritems():
        script_ranks_and_urls =\
            dbu.get_db_entry(db_file,
                             dbu.DBCmd.GET_RANK_AND_URLS_BY_CANVAS_SCRIPTS,
                             canvas_scr_urls)
        canvas_domain_counts[canvas_scr_domain] = len(script_ranks_and_urls)
        all_canvasfp_ranks[canvas_scr_domain] = map(lambda x: x[0],
                                                    script_ranks_and_urls)
        all_canvasfp_ranks_urls[canvas_scr_domain] = script_ranks_and_urls

    # print all_canvasfp_ranks
    # fu.write_to_file(j(out_dir, "%s-canvas.json" % crawl_name),
    #                 json.dumps(all_canvasfp_ranks))

    total_canvas_fp_count = sum(canvas_domain_counts.itervalues())

    # print "Total canvas FP count", total_canvas_fp_count
    rank_set = set()
    for _, v in all_canvasfp_ranks.iteritems():
        for rank in v:
            rank_set.add(rank)

    # print "Total canvas FP count - uniq", len(rank_set)

    nameSpace = {'title': "Crawl Report",
                 'visits_cnt': visits_cnt,
                 'completed_visits_cnt': completed_visits_cnt,
                 'cookies': cookies[0],
                 'localstorage': localstorage[0],
                 'flash_cookie_count': flash_cookie_count[0],
                 'canvas_meta_rows': canvas_meta_rows,
                 'start': start,
                 'end': end,
                 'canvas_domain_counts': canvas_domain_counts,
                 'canvas_url_counts': canvas_url_counts,
                 'canvas_events_per_script': canvas_events_per_script,
                 'canvas_scr_domains': canvas_scr_domains,
                 'total_canvas_fp_count': total_canvas_fp_count,
                 'canvas_script_urls': canvas_script_urls,
                 'get_tld': cu.extract_domain,
                 'xsite_flash_cookies': xsite_flash_cookies,
                 'xsite_local_storages': xsite_local_storage,
                 'respawned': respawned,
                 'figs': figs,
                 'canvasfp_ranks_urls': all_canvasfp_ranks_urls,
                 # '3rdp_cookies': 3rdp_cookies,
                 }
    report_template = Template(template_str, searchList=[nameSpace])
    fu.write_to_file(j(out_dir, "%s-report.html" % crawl_name),
                     str(report_template))
Beispiel #20
0
def get_xsite_local_storage(db_file):
    ls_candidates = dbu.get_db_entry(db_file,
                                     dbu.DBCmd.GET_XSITE_LOCALSTORAGE, False)
#    for ls_candidate in ls_candidates:
#        print list(ls_candidates)
    return ls_candidates
Beispiel #21
0
def get_distinct_items(db, item_type):
    return Set([
        item[0] for item in dbu.get_db_entry(
            db, dbu.DBCmd.GET_DISTINCT_FROM_DB, item_type)
    ])
def get_distinct_items(db, item_type):
    return Set([item[0] for item in dbu.get_db_entry(db,
                dbu.DBCmd.GET_DISTINCT_FROM_DB, item_type)])
Beispiel #23
0
def get_xsite_flash_cookies(db_file):
    ec_candidates = dbu.get_db_entry(db_file,
                                     dbu.DBCmd.GET_XSITE_FLASH_COOKIES, False)
    for ec_candidate in ec_candidates:
        print list(ec_candidate)
    return ec_candidates