Ejemplo n.º 1
0
 def setUp(self):
     self.dirs_to_remove = []
     self.db_conn = dbu.mysql_init_db('fp_detective_test')
     self.domainInfo = lp.DomainInfo() # create a new DomainInfo obj for tests
     
     self.domainInfo.rank = 1
     self.domainInfo.log_filename = '/var/log/syslog'
     self.domainInfo.url = 'http://google.com'
     self.domainInfo.fonts_loaded = ['Arial', 'Tahoma', 'Georgia', '微软雅黑']
     self.domainInfo.fonts_by_origins = {'http://google.com':['arial', 'Tahoma'], 'http://yahoo.com':['Georgia'] }
     self.domainInfo.requests = ['http://google.com', 'http://yahoo.com']
     self.domainInfo.responses = ['http://abc.com', 'http://xyz.com']
     self.domainInfo.num_font_loads = 50
     self.domainInfo.num_offsetWidth_calls = 15
     self.domainInfo.num_offsetHeight_calls = 15
     self.domainInfo.fp_detected = [fpr.FINGERPRINTER_REGEX.items()[:2]]
     self.domainInfo.crawl_id = 64654
     self.domainInfo.fpd_logs = ['userAgent', 'appCodeName']
     self.domainInfo.fc_dbg_font_loads = ['Arial', 'Tahoma', 'Georgia', 'someotherfont', '微软雅黑']
     self.domainInfo.log_complete = 1
     
     ha = ag.HeadlessAgent()
     self.crawl_job = ag.CrawlJob(ha)
     self.dirs_to_remove.append(self.crawl_job.job_dir)
     self.crawl_job.urls = ['http://google.com', 'http://yahoo.com']
     self.crawl_job.desc
Ejemplo n.º 2
0
 def test_add_index_html_line(self):
     self.new_temp_file('index.html')
     di = lp.DomainInfo()
     di.log_filename = '/tmp/as.log'
     lp.add_index_html_line(di)
     ind_file = lp.get_index_filename_for_domain_info(di)
     ind_src = fu.read_file(ind_file)
     self.assertTrue('tr' in ind_src, "Cannot find tr in index.html")
Ejemplo n.º 3
0
def parse_mitm_dump(basename, worker, crawl_id):
    dumpfile = basename + '.dmp'
    wl_log.info("Will parse mitm dump %s for crawl: %s" % (dumpfile, crawl_id))
    requests = []
    responses = []
    if os.path.isfile(dumpfile):
        fr = flow.FlowReader(open(dumpfile))
        try:
            for msg in fr.stream():
                requests.append(msg.request.get_url())
                # responses.append(msg.response.get_url())
                worker(
                    msg, crawl_id
                )  # this worker func should take care of db insertion, logging etc.
        except flow.FlowReadError as exc:
            pass
            #wl_log.critical("Error reading mitm dump %s" % exc)
    else:
        wl_log.critical("Cannot find mitm dump %s" % dumpfile)

    doma_info = lp.DomainInfo()
    doma_info.requests = requests
    doma_info.responses = responses
    doma_info.crawl_id = crawl_id
    doma_info.url = ""
    doma_info.fc_dbg_font_loads = []
    doma_info.fp_detected = lp.get_fp_from_reqs(requests)
    doma_info.log_complete = 1
    print os.path.basename(dumpfile[:-4]).split('-')[0]
    doma_info.rank = int(
        os.path.basename(dumpfile).split('-')[0]) if '-' in dumpfile else 0
    db_conn = dbu.mysql_init_db()
    site_info_id = dbu.add_site_info_to_db(doma_info, db_conn)

    # parse
    log_file = basename + '.txt'
    if not os.path.isfile(log_file):
        log_file = basename + '.' + MITM_LOG_EXTENSION

    insert_js_fun = functools.partial(lp.insert_js_info_to_db,
                                      site_info_id=site_info_id,
                                      db_conn=db_conn)
    lp.parse_crawl_log(log_file, insert_js_fun,
                       crawl_id)  # parse log, insert js info to db

    db_conn.commit()
    db_conn.close()
    wl_log.info("Parsed %s OK" % (dumpfile))
    if REMOVE_DMP_FILES:
        os.remove(dumpfile)
Ejemplo n.º 4
0
 def test_get_index_filename_for_domain_info(self):
     di = lp.DomainInfo()
     di.log_filename = '/tmp/as.log'
     self.assertEqual(lp.get_index_filename_for_domain_info(di),
                      '/tmp/index.html')