def setUp(self): self.dirs_to_remove = [] self.db_conn = dbu.mysql_init_db('fp_detective_test') self.domainInfo = lp.DomainInfo() # create a new DomainInfo obj for tests self.domainInfo.rank = 1 self.domainInfo.log_filename = '/var/log/syslog' self.domainInfo.url = 'http://google.com' self.domainInfo.fonts_loaded = ['Arial', 'Tahoma', 'Georgia', '微软雅黑'] self.domainInfo.fonts_by_origins = {'http://google.com':['arial', 'Tahoma'], 'http://yahoo.com':['Georgia'] } self.domainInfo.requests = ['http://google.com', 'http://yahoo.com'] self.domainInfo.responses = ['http://abc.com', 'http://xyz.com'] self.domainInfo.num_font_loads = 50 self.domainInfo.num_offsetWidth_calls = 15 self.domainInfo.num_offsetHeight_calls = 15 self.domainInfo.fp_detected = [fpr.FINGERPRINTER_REGEX.items()[:2]] self.domainInfo.crawl_id = 64654 self.domainInfo.fpd_logs = ['userAgent', 'appCodeName'] self.domainInfo.fc_dbg_font_loads = ['Arial', 'Tahoma', 'Georgia', 'someotherfont', '微软雅黑'] self.domainInfo.log_complete = 1 ha = ag.HeadlessAgent() self.crawl_job = ag.CrawlJob(ha) self.dirs_to_remove.append(self.crawl_job.job_dir) self.crawl_job.urls = ['http://google.com', 'http://yahoo.com'] self.crawl_job.desc
def test_add_index_html_line(self): self.new_temp_file('index.html') di = lp.DomainInfo() di.log_filename = '/tmp/as.log' lp.add_index_html_line(di) ind_file = lp.get_index_filename_for_domain_info(di) ind_src = fu.read_file(ind_file) self.assertTrue('tr' in ind_src, "Cannot find tr in index.html")
def parse_mitm_dump(basename, worker, crawl_id): dumpfile = basename + '.dmp' wl_log.info("Will parse mitm dump %s for crawl: %s" % (dumpfile, crawl_id)) requests = [] responses = [] if os.path.isfile(dumpfile): fr = flow.FlowReader(open(dumpfile)) try: for msg in fr.stream(): requests.append(msg.request.get_url()) # responses.append(msg.response.get_url()) worker( msg, crawl_id ) # this worker func should take care of db insertion, logging etc. except flow.FlowReadError as exc: pass #wl_log.critical("Error reading mitm dump %s" % exc) else: wl_log.critical("Cannot find mitm dump %s" % dumpfile) doma_info = lp.DomainInfo() doma_info.requests = requests doma_info.responses = responses doma_info.crawl_id = crawl_id doma_info.url = "" doma_info.fc_dbg_font_loads = [] doma_info.fp_detected = lp.get_fp_from_reqs(requests) doma_info.log_complete = 1 print os.path.basename(dumpfile[:-4]).split('-')[0] doma_info.rank = int( os.path.basename(dumpfile).split('-')[0]) if '-' in dumpfile else 0 db_conn = dbu.mysql_init_db() site_info_id = dbu.add_site_info_to_db(doma_info, db_conn) # parse log_file = basename + '.txt' if not os.path.isfile(log_file): log_file = basename + '.' + MITM_LOG_EXTENSION insert_js_fun = functools.partial(lp.insert_js_info_to_db, site_info_id=site_info_id, db_conn=db_conn) lp.parse_crawl_log(log_file, insert_js_fun, crawl_id) # parse log, insert js info to db db_conn.commit() db_conn.close() wl_log.info("Parsed %s OK" % (dumpfile)) if REMOVE_DMP_FILES: os.remove(dumpfile)
def test_get_index_filename_for_domain_info(self): di = lp.DomainInfo() di.log_filename = '/tmp/as.log' self.assertEqual(lp.get_index_filename_for_domain_info(di), '/tmp/index.html')