예제 #1
0
    def test_add_symlink(self):
        test_link = self.new_temp_file('test_link')  # symbolic link
        src_file = self.new_temp_file('linktest.txt')
        fu.write_to_file(src_file, "link test")

        fu.add_symlink(test_link, src_file)
        self.failUnless(os.path.lexists(test_link))
예제 #2
0
def generate_index_file(path):
    table_str = '<table><th>Rank</th><th>Domain</th><th># fonts requested</th>'
    fonts_dict = {}
    i = 0
    for json_file in fu.gen_find_files("*.json", path):
        i = i + 1
        wl_log.info("%s - %s" % (i, json_file))
        domaInfo = load_domainfo_from_json_file(json_file)
        if domaInfo.num_font_loads > FONT_LOAD_THRESHOLD or domaInfo.fp_detected:
            fonts_dict[domaInfo.log_filename] = domaInfo.num_font_loads
            
    sorted_font_dict = sorted(fonts_dict.iteritems(), key=operator.itemgetter(1), reverse=True)
    
    for filename, num_font_loaded in sorted_font_dict:
        #if num_font_loaded > FONT_LOAD_THRESHOLD:
        rank,domain = get_rank_domain_from_filename(filename)
        output_filename = os.path.basename(filename)[:-4] + ".html"
        table_str += '<tr><td>'+  rank + '</td><td><a href="' + output_filename + '">' + domain \
                + '</a></td><td>' + str(num_font_loaded) +  '</td></tr>' 
        
    table_str += '</table>'
    
    html_str = "<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8' />\
            <meta http-equiv='Content-Type' content='text/html; charset=utf-8' /> </head><body>" + table_str + "</body></html>"
    index_filename = os.path.join(path, "index.html")
    fu.write_to_file(index_filename, html_str.encode('utf-8'))
예제 #3
0
def close_index_html(index_file):
    # wl_log.info('Will close %s' % index_file)
    # TODO: add check to don't close a file twice
    if not os.path.isfile(index_file):
        fu.write_to_file(index_file, '')  # create an empty file

    index_src = fu.read_file(index_file)
    if index_src.startswith('<html'):
        wl_log.info('Index file %s  already closed' % index_file)
        return

    scripts_src = """<script type="text/javascript" language="javascript" src="http://homes.esat.kuleuven.be/~gacar/jscss/jquery-1.9.1.min.js"></script>
    
    <style type="text/css" title="currentStyle">
        @import "../../js/css/demo_page.css";
        @import "../../js/css/demo_table.css";
    </style>
    <script type="text/javascript" language="javascript" src="http://homes.esat.kuleuven.be/~gacar/jscss/jquery.dataTables.min.js"></script>
    <script type="text/javascript" charset="utf-8">
        $(document).ready(function() {
            $('#results').dataTable( {
            "aaSorting": [[ 2, "desc" ]]
            } );
        } );
    </script>"""

    html_str = "<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8' />\
            <meta http-equiv='Content-Type' content='text/html; charset=utf-8' />" + scripts_src + "</head>\n<body><table id ='results'>\
            \n<thead><tr><th>Rank</th><th>Domain</th><th>Fonts</th><th>OffsetWidth</th><th>OffsetHeight</th><th>FP found</th></tr></thead>" + index_src + '</table></body></html>'

    fu.write_to_file(index_file, html_str)
예제 #4
0
 def test_add_symlink(self):
     test_link = self.new_temp_file('test_link') # symbolic link
     src_file = self.new_temp_file('linktest.txt') 
     fu.write_to_file(src_file, "link test")
     
     fu.add_symlink(test_link, src_file)
     self.failUnless(os.path.lexists(test_link))
예제 #5
0
def generate_index_file(path):
    table_str = '<table><th>Rank</th><th>Domain</th><th># fonts requested</th>'
    fonts_dict = {}
    i = 0
    for json_file in fu.gen_find_files("*.json", path):
        i = i + 1
        wl_log.info("%s - %s" % (i, json_file))
        domaInfo = load_domainfo_from_json_file(json_file)
        if domaInfo.num_font_loads > FONT_LOAD_THRESHOLD or domaInfo.fp_detected:
            fonts_dict[domaInfo.log_filename] = domaInfo.num_font_loads

    sorted_font_dict = sorted(fonts_dict.iteritems(),
                              key=operator.itemgetter(1),
                              reverse=True)

    for filename, num_font_loaded in sorted_font_dict:
        #if num_font_loaded > FONT_LOAD_THRESHOLD:
        rank, domain = get_rank_domain_from_filename(filename)
        output_filename = os.path.basename(filename)[:-4] + ".html"
        table_str += '<tr><td>'+  rank + '</td><td><a href="' + output_filename + '">' + domain \
                + '</a></td><td>' + str(num_font_loaded) +  '</td></tr>'

    table_str += '</table>'

    html_str = "<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8' />\
            <meta http-equiv='Content-Type' content='text/html; charset=utf-8' /> </head><body>" + table_str + "</body></html>"
    index_filename = os.path.join(path, "index.html")
    fu.write_to_file(index_filename, html_str.encode('utf-8'))
예제 #6
0
def close_index_html(index_file):
    # wl_log.info('Will close %s' % index_file)
    # TODO: add check to don't close a file twice 
    if not os.path.isfile(index_file):
        fu.write_to_file(index_file, '') # create an empty file
        
    index_src = fu.read_file(index_file) 
    if index_src.startswith('<html'):
        wl_log.info('Index file %s  already closed' % index_file)
        return
    
    scripts_src = """<script type="text/javascript" language="javascript" src="http://homes.esat.kuleuven.be/~gacar/jscss/jquery-1.9.1.min.js"></script>
    
    <style type="text/css" title="currentStyle">
        @import "../../js/css/demo_page.css";
        @import "../../js/css/demo_table.css";
    </style>
    <script type="text/javascript" language="javascript" src="http://homes.esat.kuleuven.be/~gacar/jscss/jquery.dataTables.min.js"></script>
    <script type="text/javascript" charset="utf-8">
        $(document).ready(function() {
            $('#results').dataTable( {
            "aaSorting": [[ 2, "desc" ]]
            } );
        } );
    </script>"""
        
        
    html_str = "<html><head><meta http-equiv='Content-Type' content='text/html; charset=utf-8' />\
            <meta http-equiv='Content-Type' content='text/html; charset=utf-8' />" + scripts_src + "</head>\n<body><table id ='results'>\
            \n<thead><tr><th>Rank</th><th>Domain</th><th>Fonts</th><th>OffsetWidth</th><th>OffsetHeight</th><th>FP found</th></tr></thead>" +  index_src + '</table></body></html>' 
    
    fu.write_to_file(index_file, html_str)
예제 #7
0
파일: mitm.py 프로젝트: thijsh/fpdetective
def store_swfs(msg, crawl_id, dir_path='/tmp', prefix='?'):
    
    referer = msg.request.headers['Referer'][0] if msg.request.headers['Referer'] else ""
    
    if msg.response and msg.response.content:
        print msg.request.get_url()
        if (msg.response.content[:3] in SWF_MAGIC_NUMBERS): # to wide, but decompiler will discard them
            
            swf_hash = ut.hash_text(msg.response.content)
            swf_url = msg.request.get_url()
            
            db_conn = dbu.mysql_init_db()
            db_cursor = db_conn.cursor(dbu.mdb.cursors.DictCursor)
            rows = swu.get_swf_obj_from_db('hash', swf_hash, db_cursor)
            
            if not rows:
                swf_filename = os.path.join(dir_path, "%s-%s" % (prefix, msg.request.path.split('/')[-1]))
                swf_filename = swf_filename[:MAX_FILENAME_LEN]
                if not swf_filename.endswith('.swf'):
                    swf_filename += '.swf'
                    
                wl_log.info("SWF saved %s referrer: %s" % (os.path.basename(swf_filename), referer))
                
                fu.write_to_file(swf_filename, msg.response.content)
                vector = swu.get_occurence_vector_from_swf(swf_filename, os.path.join(dir_path, prefix))
                duplicate_swf = 0
            else:
                wl_log.info("A swf with same hash exists in DB: %s %s" % (swf_hash, swf_url))
                vector = swu.str_to_vector(rows[0]['occ_vector'])
                swf_filename = rows[0]['local_path']
                duplicate_swf = 1
            
            rank, domain = prefix.rsplit('/')[-1].split('-', 1)
            swf_info = swu.SwfInfo()
            
            swf_info.rank = rank # this might be fake
            swf_info.domain = domain
            swf_info.local_path = swf_filename
            swf_info.occ_vector = vector
            swf_info.hash = swf_hash
            swf_info.url = swf_url
            swf_info.referer = referer        
            swf_info.duplicate = duplicate_swf # !!! Y for repeated swfs(that we know before) 
            swf_info.feat_vector = []
            swf_info.page_url = ''
            swf_info.occ_string = ' '.join(swu.human_readable_occ_vector(vector))
            swf_info.crawl_id = crawl_id
            
            swu.add_swf_to_db(swf_info, db_conn)
            db_conn.commit()
            db_cursor.close()
            db_conn.close()
            
            
        elif '.swf' in msg.request.path:
            wl_log.warning(".swf in path but content seems non-swf %s %s" % (msg.request.path, msg.response.content[:100]))
        else:
            pass
예제 #8
0
 def test_job_folder_should_be_writable(self):
     out_dir = self.create_job_folder()
     self.assert_(os.path.isdir(out_dir), 'Cannot create job folder')
     out_file = os.path.join(out_dir, 'some.log')
     
     file_content = '123456789'
     fu.write_to_file(out_file, file_content)
     self.assert_(os.path.isfile(out_file), 'Cannot create file in job folder')
     self.assert_(file_content == fu.read_file(out_file), 'Cannot create file in job folder')
예제 #9
0
 def test_hash_file(self):
     filename = self.new_temp_file('hash_test.txt')
     random_str = ut.rand_str(1000)
     fu.write_to_file(filename, random_str)
     self.assertEqual(fu.hash_file(filename, 'sha1'),
                      ut.hash_text(random_str, 'sha1'),
                      'SHA1 hashes don\'t match')
     self.assertEqual(fu.hash_file(filename), ut.hash_text(random_str),
                      'Hashes with default algo don\'t match')
예제 #10
0
 def test_hash_file(self):
     filename = self.new_temp_file('hash_test.txt')
     random_str = ut.rand_str(1000)
     fu.write_to_file(filename, random_str)
     self.assertEqual(fu.hash_file(filename, 'sha1'), 
                      ut.hash_text(random_str, 'sha1'), 
                      'SHA1 hashes don\'t match')
     self.assertEqual(fu.hash_file(filename), ut.hash_text(random_str), 
                      'Hashes with default algo don\'t match')
예제 #11
0
def generate_results_page(domaInfo):
    """Generate results page for the given domain information."""

    back_link = '<div><a href="index.html">Index</a></div>'
    rank_str = "<h2>%s - %s -  <a href='%s' target='_blank'>%s</a></h2>" % (
        str(domaInfo.rank), domaInfo.url, domaInfo.url, EXT_LINK_IMG)

    fonts_list = ' &bull; '.join('<span style="font-family:%s">%s</span>' %
                                 (font_name, font_name)
                                 for font_name in domaInfo.fonts_loaded)

    font_div = "<div class='fonts'><p><b>%s</b> fonts loaded, <b>%s</b> num_offsetWidth_calls, <b>%s</b> num_offsetHeight_calls</p>\
        \n<div class='font_list'>%s</div></div>" % (
        len(domaInfo.fonts_loaded), domaInfo.num_offsetWidth_calls,
        domaInfo.num_offsetHeight_calls, fonts_list)

    unique_urls = set(domaInfo.responses + domaInfo.requests)
    unique_http_urls = [
        url for url in unique_urls if re.match(r"https?:\/\/[^.]+\.[^.]", url)
    ]  # filter out data urls

    unique_domains = set(
        pub_suffix.get_public_suffix(url) for url in unique_http_urls
        if url.startswith('http'))
    unique_domains = [
        mark_if_fp(address) for address in sorted(unique_domains)
    ]
    domain_list = "<ul class='domains'>\n<li>%s</li></ul>" % (
        "</li>\n<li>".join(unique_domains))

    unique_urls = ["<a href='%s' target='_blank'>%s</a> - %s" %\
                    (address, EXT_LINK_IMG, mark_if_fp(address)) for address in sorted(unique_http_urls)]
    url_list = "<ul class='urls'>\n<li>%s</li></ul>" % (
        "</li>\n<li>".join(unique_urls))

    domains_div = "<div class='domains'><p> Number of different domains loaded: <b>"\
     + str(len(unique_domains)) + "</b></p><div class='domains_list'> " + domain_list + "</div></div>"

    urls_div = "<div class='urls'><p> Number of different URLs loaded: <b>"\
     + str(len(unique_urls)) + "</b></p>\n<div class='urls_list'> " + url_list + "</div></div>"

    font_orig_str = "<p>Fonts per origin</p><ul>"
    for orig, fonts in domaInfo.fonts_by_origins.iteritems():
        font_orig_str += "<li>%s: %s %s</li>" % (
            json_field_name_to_origin(orig), len(fonts), fonts)
    font_orig_str += "</ul>"

    html_str = "<html>\n<head>\n<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />\
    \n<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />\
    \n<style>span.red{color:red; font-weight:bold;}\
    \n</style>\n</head>\n<body>" + back_link + rank_str + font_div + font_orig_str + domains_div + urls_div + "\n</body>\n</html>"

    output_filename = domaInfo.log_filename[:-4] + ".html"

    fu.write_to_file(output_filename, html_str)
예제 #12
0
 def test_close_index_html(self):
     index_filename = 'files/html/results/index.html'
     index_filename = self.abs_test_file_name(index_filename)
     
     # self.new_temp_file(index_filename) # to remove it after test finishes
     table_rows = """<tr><td>1</td><td><a href="/home/user/fpbase/run/jobs/20130420-010404/1-google-com.html">http://google.com/</a></td><td>10</td><td>1</td></tr>
     <tr><td>118</td><td><a href="/home/user/fpbase/run/jobs/20130420-010404/118-google-com-ar.html">http://google.com.ar/</a></td><td>3</td><td>51</td></tr>
     <tr><td>27</td><td><a href="/home/user/fpbase/run/jobs/20130420-010404/27-google-co-uk.html">http://google.co.uk/</a></td><td>1</td><td>11</td></tr>"""
     
     fu.write_to_file(index_filename, table_rows)
     
     lp.close_index_html(index_filename)
     index_src = fu.read_file(index_filename)
     self.assertTrue('<table' in  index_src, 'No table in index.html')
     self.assertTrue('<thead' in  index_src, 'No thead in index.html')
     self.assertTrue('</html>' in  index_src, 'No closing html tag index.html')
예제 #13
0
    def test_close_index_html(self):
        index_filename = 'files/html/results/index.html'
        index_filename = self.abs_test_file_name(index_filename)

        # self.new_temp_file(index_filename) # to remove it after test finishes
        table_rows = """<tr><td>1</td><td><a href="/home/user/fpbase/run/jobs/20130420-010404/1-google-com.html">http://google.com/</a></td><td>10</td><td>1</td></tr>
        <tr><td>118</td><td><a href="/home/user/fpbase/run/jobs/20130420-010404/118-google-com-ar.html">http://google.com.ar/</a></td><td>3</td><td>51</td></tr>
        <tr><td>27</td><td><a href="/home/user/fpbase/run/jobs/20130420-010404/27-google-co-uk.html">http://google.co.uk/</a></td><td>1</td><td>11</td></tr>"""

        fu.write_to_file(index_filename, table_rows)

        lp.close_index_html(index_filename)
        index_src = fu.read_file(index_filename)
        self.assertTrue('<table' in index_src, 'No table in index.html')
        self.assertTrue('<thead' in index_src, 'No thead in index.html')
        self.assertTrue('</html>' in index_src,
                        'No closing html tag index.html')
예제 #14
0
def generate_results_page(domaInfo):    
    """Generate results page for the given domain information."""
    
    back_link = '<div><a href="index.html">Index</a></div>'
    rank_str = "<h2>%s - %s -  <a href='%s' target='_blank'>%s</a></h2>" % (str(domaInfo.rank), domaInfo.url, domaInfo.url, EXT_LINK_IMG)
    
    fonts_list = ' &bull; '.join('<span style="font-family:%s">%s</span>' %(font_name, font_name) for font_name in domaInfo.fonts_loaded) 
    
    font_div = "<div class='fonts'><p><b>%s</b> fonts loaded, <b>%s</b> num_offsetWidth_calls, <b>%s</b> num_offsetHeight_calls</p>\
        \n<div class='font_list'>%s</div></div>" % (len(domaInfo.fonts_loaded), domaInfo.num_offsetWidth_calls, domaInfo.num_offsetHeight_calls, fonts_list)
    
    unique_urls = set(domaInfo.responses + domaInfo.requests)
    unique_http_urls = [url for url in unique_urls if re.match(r"https?:\/\/[^.]+\.[^.]", url)] # filter out data urls
    
    unique_domains = set(pub_suffix.get_public_suffix(url) for url in unique_http_urls if url.startswith('http'))
    unique_domains = [mark_if_fp(address) for address in sorted(unique_domains)]
    domain_list = "<ul class='domains'>\n<li>%s</li></ul>" % ("</li>\n<li>".join(unique_domains))
    
    unique_urls = ["<a href='%s' target='_blank'>%s</a> - %s" %\
                    (address, EXT_LINK_IMG, mark_if_fp(address)) for address in sorted(unique_http_urls)]
    url_list = "<ul class='urls'>\n<li>%s</li></ul>" % ("</li>\n<li>".join(unique_urls))
    
    domains_div = "<div class='domains'><p> Number of different domains loaded: <b>"\
     + str(len(unique_domains)) + "</b></p><div class='domains_list'> " + domain_list + "</div></div>"
    
    urls_div = "<div class='urls'><p> Number of different URLs loaded: <b>"\
     + str(len(unique_urls)) + "</b></p>\n<div class='urls_list'> " + url_list + "</div></div>"
    
    font_orig_str = "<p>Fonts per origin</p><ul>"
    for orig, fonts in domaInfo.fonts_by_origins.iteritems():
        font_orig_str += "<li>%s: %s %s</li>" % (json_field_name_to_origin(orig), len(fonts), fonts)
    font_orig_str += "</ul>"
    
    html_str = "<html>\n<head>\n<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />\
    \n<meta http-equiv='Content-Type' content='text/html; charset=utf-8' />\
    \n<style>span.red{color:red; font-weight:bold;}\
    \n</style>\n</head>\n<body>" + back_link + rank_str + font_div + font_orig_str + domains_div + urls_div + "\n</body>\n</html>"
    
    output_filename = domaInfo.log_filename[:-4] + ".html"
    
    fu.write_to_file(output_filename, html_str)
예제 #15
0
파일: mitm.py 프로젝트: eliant/fpdetective
def store_swfs(msg, crawl_id, dir_path='/tmp', prefix='?'):

    referer = msg.request.headers['Referer'][0] if msg.request.headers[
        'Referer'] else ""

    if msg.response and msg.response.content:
        print msg.request.get_url()
        if (msg.response.content[:3] in SWF_MAGIC_NUMBERS
            ):  # to wide, but decompiler will discard them

            swf_hash = ut.hash_text(msg.response.content)
            swf_url = msg.request.get_url()

            db_conn = dbu.mysql_init_db()
            db_cursor = db_conn.cursor(dbu.mdb.cursors.DictCursor)
            rows = swu.get_swf_obj_from_db('hash', swf_hash, db_cursor)

            if not rows:
                swf_filename = os.path.join(
                    dir_path,
                    "%s-%s" % (prefix, msg.request.path.split('/')[-1]))
                swf_filename = swf_filename[:MAX_FILENAME_LEN]
                if not swf_filename.endswith('.swf'):
                    swf_filename += '.swf'

                wl_log.info("SWF saved %s referrer: %s" %
                            (os.path.basename(swf_filename), referer))

                fu.write_to_file(swf_filename, msg.response.content)
                vector = swu.get_occurence_vector_from_swf(
                    swf_filename, os.path.join(dir_path, prefix))
                duplicate_swf = 0
            else:
                wl_log.info("A swf with same hash exists in DB: %s %s" %
                            (swf_hash, swf_url))
                vector = swu.str_to_vector(rows[0]['occ_vector'])
                swf_filename = rows[0]['local_path']
                duplicate_swf = 1

            rank, domain = prefix.rsplit('/')[-1].split('-', 1)
            swf_info = swu.SwfInfo()

            swf_info.rank = rank  # this might be fake
            swf_info.domain = domain
            swf_info.local_path = swf_filename
            swf_info.occ_vector = vector
            swf_info.hash = swf_hash
            swf_info.url = swf_url
            swf_info.referer = referer
            swf_info.duplicate = duplicate_swf  # !!! Y for repeated swfs(that we know before)
            swf_info.feat_vector = []
            swf_info.page_url = ''
            swf_info.occ_string = ' '.join(
                swu.human_readable_occ_vector(vector))
            swf_info.crawl_id = crawl_id

            swu.add_swf_to_db(swf_info, db_conn)
            db_conn.commit()
            db_cursor.close()
            db_conn.close()

        elif '.swf' in msg.request.path:
            wl_log.warning(".swf in path but content seems non-swf %s %s" %
                           (msg.request.path, msg.response.content[:100]))
        else:
            pass
예제 #16
0
 def test_write_to_file(self):
     filename = self.new_temp_file('write_test.txt')
     random_str = ut.rand_str(100)
     fu.write_to_file(filename, random_str)
     self.assertEqual(random_str, fu.read_file(filename))
예제 #17
0
 def test_write_to_file(self):
     filename = self.new_temp_file('write_test.txt')
     random_str = ut.rand_str(100)
     fu.write_to_file(filename, random_str)
     self.assertEqual(random_str, fu.read_file(filename))