def tags2(): logs = DBUtil.select_all_logs() list2 = ["javascript", "expression", "applet", "meta", "xml", "blink", "link", "style", "script", "object", "iframe", "frame", "frameset", "ilayer", "layer", "bgsound", "title", "base", "vbscript", "embed", "`", "--", ";", "or", "select", "union", "insert", \ "update", "replace", "truncate", "delete", "sp", "xp", "system(", "eval(", "ftp:", "ptp:", "data:", "../", "..\\", "jsessionid", "login.jsp?userid", "login.jsp?password"] ip_dict = {} # index = 0 print len(logs) for log in logs: # index = index + 1 # if index % 1000 == 0: # print index tags2 = uri_contain_keyword(log.cs_uri, list2) if tags2 == 1: count = ip_dict.get(log.c_ip, 0) count = count + 1 ip_dict[log.c_ip] = count print len(ip_dict) index = 0 mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() for ip, count in ip_dict.iteritems(): index = index + 1 if index % 10 == 0: print index cur.execute("UPDATE Features SET tags2=%s WHERE c_ip=%s", (count, ip)) mysql_conn.commit() cur.close() mysql_conn.close()
def simple_features(): log_list = DBUtil.select_all_logs() mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 0 for log in log_list: index = index + 1 if index % 1000 == 0: print index visitDuration = int(log.time_taken_ms) bytesTransfer = int(log.sc_bytes) * int(log.cs_bytes) seriousError = 1 if int(log.sc_status) >= 400 else 0 list1 = ["javascript", "expression", "applet", "meta", "xml", "blink", "link", "style", "script", "object", "iframe", "frame", "frameset", "ilayer", "layer", "bgsound", "title", "base", "vbscript", "embed", "`", "--", ";", "or", "select", "union", "insert", "update", "replace", "truncate", "delete", "sp", "xp", "system(", "eval(", "http:", "https:", "ftp:", "ptp:", "data:", "../", "..\\", "jsessionid", "login.jsp?userid", "login.jsp?password"] tags = uri_contain_keyword(log.cs_uri, list1) cur.execute( "INSERT INTO Features (id, c_ip, visitDuration, bytesTransfer, seriousError, tags) VALUES (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE visitDuration=VALUES(visitDuration), bytesTransfer=VALUES(bytesTransfer), seriousError=VALUES(seriousError), tags=VALUES(tags)", (log.id, log.c_ip, visitDuration, bytesTransfer, seriousError, tags)) mysql_conn.commit() cur.close() mysql_conn.close()
def simple_features(): log_list = DBUtil.select_all_logs() mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 0 for log in log_list: index = index + 1 if index % 1000 == 0: print index visitDuration = int(log.time_taken_ms) bytesTransfer = int(log.sc_bytes) * int(log.cs_bytes) seriousError = 1 if int(log.sc_status) >= 400 else 0 tags = access_tag(log.cs_uri) cur.execute( "INSERT INTO Features (id, c_ip, visitDuration, bytesTransfer, seriousError, tags) VALUES (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE visitDuration=VALUES(visitDuration), bytesTransfer=VALUES(bytesTransfer), seriousError=VALUES(seriousError), tags=VALUES(tags)", (log.id, log.c_ip, visitDuration, bytesTransfer, seriousError, tags)) mysql_conn.commit() cur.close() mysql_conn.close()
def simple_features(): log_list = DBUtil.select_all_logs() count_dict = DBUtil.select_log_count_by_ip() mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 0 for log in log_list: index = index + 1 if index % 1000 == 0: print index visitDuration = int(log.time_taken_ms) bytesTransfer = int(log.sc_bytes) + int(log.cs_bytes) urlLength = 0 space_index = log.cs_uri.find(" ") if space_index != -1: urlLength = len(log.cs_uri[space_index:]) if log.c_ip == "": numIP = 0 else: numIP = count_dict[log.c_ip] cur.execute( "INSERT INTO Features (id, c_ip, visitDuration, numIP, bytesTransfer, urlLength) VALUES (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE visitDuration=VALUES(visitDuration), numIP=VALUES(numIP), bytesTransfer=VALUES(bytesTransfer), urlLength=VALUES(urlLength)", (log.id, log.c_ip, visitDuration, numIP, bytesTransfer, urlLength)) mysql_conn.commit() cur.close() mysql_conn.close()
def unsafeChar(): log_list = DBUtil.select_all_logs() log_dict = {} unsafechar_value_dict = {} index = 0 total_log_num = len(log_list) for log in log_list: index = index + 1 if index % 1000 == 0: print str(index) + " of " + str(total_log_num) unsafechar_value = scanSafeChar(log.cs_uri) dict = log_dict.get(log.c_ip, {}) dict[log.id] = unsafechar_value log_dict[log.c_ip] = dict agg_value = unsafechar_value_dict.get(log.c_ip, 0) unsafechar_value_dict[log.c_ip] = agg_value + unsafechar_value mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 0 for c_ip, dict in log_dict.iteritems(): agg_value = unsafechar_value_dict.get(c_ip) for log_id, unsafechar_value in dict.iteritems(): if unsafechar_value > 0: index = index + 1 if index % 10 == 0: print str(index) + " of " + str(total_log_num) cur.execute("UPDATE Features SET unsafechar=%s WHERE id=%s", (agg_value, log_id)) mysql_conn.commit() cur.close() mysql_conn.close()
def numLogs2PerIP(): threshold = 1.5 log_list = DBUtil.select_all_logs() numLogs2PerIP_list = [] mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 0 for log in log_list: index = index + 1 if index % 100 == 0: print index lower_bound = log.log_time - datetime.timedelta(0, threshold) upper_bound = log.log_time + datetime.timedelta(0, threshold) cur.execute('SELECT COUNT(*) FROM Cleaned_Log WHERE c_ip=%s AND (log_time BETWEEN %s AND %s)', (log.c_ip, lower_bound, upper_bound)) rows = cur.fetchall() for row in rows: numLogs2PerIP_list.append([log.id, row[0]]) for numLogs2PerIP in numLogs2PerIP_list: cur.execute( "INSERT INTO Features (id, numLogs2PerIP) VALUES (%s, %s) ON DUPLICATE KEY UPDATE numLogs2PerIP=VALUES(numLogs2PerIP)", (numLogs2PerIP[0], numLogs2PerIP[1])) mysql_conn.commit() cur.close() mysql_conn.close()