def test_add_site_info_to_db(self): site_info_id = dbu.add_site_info_to_db(self.domainInfo, self.db_conn) # insert mock obj to db site_info_row = dbu.get_site_info_from_db(self.db_conn, by='id', value=site_info_id)[0] # retrieve inserted obj self.assert_db_val_equal(site_info_row, 'http_requests', ' '.join(self.domainInfo.requests)) self.assert_db_val_equal(site_info_row, 'http_responses', ' '.join(self.domainInfo.responses)) self.assert_db_val_equal(site_info_row, 'crawl_id', self.domainInfo.crawl_id) self.assert_db_val_equal(site_info_row, 'url', self.domainInfo.url) self.assert_db_val_equal(site_info_row, 'fc_dbg_font_loads', ','.join(self.domainInfo.fc_dbg_font_loads)) self.assert_db_val_equal(site_info_row, 'num_fc_dbg_font_loads', len(self.domainInfo.fc_dbg_font_loads)) self.assert_db_val_equal(site_info_row, 'rank', self.domainInfo.rank) self.assert_db_val_equal(site_info_row, 'log_complete', self.domainInfo.log_complete)
def test_add_site_info_to_db(self): site_info_id = dbu.add_site_info_to_db(self.domainInfo, self.db_conn) # insert mock obj to db site_info_row = dbu.get_site_info_from_db(self.db_conn, by='id', value=site_info_id)[0] # retrieve inserted obj self.assert_db_val_equal(site_info_row, 'http_requests', ' '.join(self.domainInfo.requests)) self.assert_db_val_equal(site_info_row, 'http_responses', ' '.join(self.domainInfo.responses)) self.assert_db_val_equal(site_info_row, 'crawl_id', self.domainInfo.crawl_id) self.assert_db_val_equal(site_info_row, 'url', self.domainInfo.url) self.assert_db_val_equal(site_info_row, 'fc_dbg_font_loads', ','.join(self.domainInfo.fc_dbg_font_loads)) self.assert_db_val_equal(site_info_row, 'num_fc_dbg_font_loads', len(self.domainInfo.fc_dbg_font_loads)) self.assert_db_val_equal(site_info_row, 'rank', self.domainInfo.rank) self.assert_db_val_equal(site_info_row, 'log_complete', self.domainInfo.log_complete) self.assert_db_val_equal(site_info_row, 'fp_detected', ' '.join([str(fp) for fp in self.domainInfo.fp_detected]))
def parse_mitm_dump(basename, worker, crawl_id): dumpfile = basename + '.dmp' wl_log.info("Will parse mitm dump %s for crawl: %s" % (dumpfile, crawl_id)) requests = [] responses = [] if os.path.isfile(dumpfile): fr = flow.FlowReader(open(dumpfile)) try: for msg in fr.stream(): requests.append(msg.request.get_url()) # responses.append(msg.response.get_url()) worker( msg, crawl_id ) # this worker func should take care of db insertion, logging etc. except flow.FlowReadError as exc: pass #wl_log.critical("Error reading mitm dump %s" % exc) else: wl_log.critical("Cannot find mitm dump %s" % dumpfile) doma_info = lp.DomainInfo() doma_info.requests = requests doma_info.responses = responses doma_info.crawl_id = crawl_id doma_info.url = "" doma_info.fc_dbg_font_loads = [] doma_info.fp_detected = lp.get_fp_from_reqs(requests) doma_info.log_complete = 1 print os.path.basename(dumpfile[:-4]).split('-')[0] doma_info.rank = int( os.path.basename(dumpfile).split('-')[0]) if '-' in dumpfile else 0 db_conn = dbu.mysql_init_db() site_info_id = dbu.add_site_info_to_db(doma_info, db_conn) # parse log_file = basename + '.txt' if not os.path.isfile(log_file): log_file = basename + '.' + MITM_LOG_EXTENSION insert_js_fun = functools.partial(lp.insert_js_info_to_db, site_info_id=site_info_id, db_conn=db_conn) lp.parse_crawl_log(log_file, insert_js_fun, crawl_id) # parse log, insert js info to db db_conn.commit() db_conn.close() wl_log.info("Parsed %s OK" % (dumpfile)) if REMOVE_DMP_FILES: os.remove(dumpfile)
def parse_mitm_dump(basename, worker, crawl_id): dumpfile = basename +'.dmp' wl_log.info("Will parse mitm dump %s for crawl: %s" % (dumpfile, crawl_id)) requests = [] responses = [] if os.path.isfile(dumpfile): fr = flow.FlowReader(open(dumpfile)) try: for msg in fr.stream(): requests.append(msg.request.get_url()) # responses.append(msg.response.get_url()) worker(msg, crawl_id) # this worker func should take care of db insertion, logging etc. except flow.FlowReadError as _: pass #wl_log.critical("Error reading mitm dump %s" % exc) else: wl_log.critical("Cannot find mitm dump %s" % dumpfile) doma_info = lp.DomainInfo() doma_info.requests = requests doma_info.responses = responses doma_info.crawl_id = crawl_id doma_info.url = "" doma_info.fc_dbg_font_loads = [] doma_info.fp_detected = lp.get_fp_from_reqs(requests) doma_info.log_complete = 1 print os.path.basename(dumpfile[:-4]).split('-')[0] doma_info.rank = int(os.path.basename(dumpfile).split('-')[0]) if '-' in dumpfile else 0 db_conn = dbu.mysql_init_db() site_info_id = dbu.add_site_info_to_db(doma_info, db_conn) # parse log_file = basename + '.txt' if not os.path.isfile(log_file): log_file = basename + '.' + MITM_LOG_EXTENSION insert_js_fun = functools.partial(lp.insert_js_info_to_db, site_info_id=site_info_id, db_conn=db_conn) lp.parse_crawl_log(log_file, insert_js_fun, crawl_id) # parse log, insert js info to db db_conn.commit() db_conn.close() wl_log.info("Parsed %s OK" % (dumpfile)) if REMOVE_DMP_FILES: os.remove(dumpfile)
def insert_domain_info_to_db(domaInfo): db_conn = dbu.mysql_init_db() site_info_id = dbu.add_site_info_to_db(domaInfo, db_conn) dbu.add_js_info_to_db(domaInfo, db_conn, site_info_id) db_conn.commit() db_conn.close()