def tags2(): logs = DBUtil.select_all_logs() list2 = ["javascript", "expression", "applet", "meta", "xml", "blink", "link", "style", "script", "object", "iframe", "frame", "frameset", "ilayer", "layer", "bgsound", "title", "base", "vbscript", "embed", "`", "--", ";", "or", "select", "union", "insert", \ "update", "replace", "truncate", "delete", "sp", "xp", "system(", "eval(", "ftp:", "ptp:", "data:", "../", "..\\", "jsessionid", "login.jsp?userid", "login.jsp?password"] ip_dict = {} # index = 0 print len(logs) for log in logs: # index = index + 1 # if index % 1000 == 0: # print index tags2 = uri_contain_keyword(log.cs_uri, list2) if tags2 == 1: count = ip_dict.get(log.c_ip, 0) count = count + 1 ip_dict[log.c_ip] = count print len(ip_dict) index = 0 mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() for ip, count in ip_dict.iteritems(): index = index + 1 if index % 10 == 0: print index cur.execute("UPDATE Features SET tags2=%s WHERE c_ip=%s", (count, ip)) mysql_conn.commit() cur.close() mysql_conn.close()
def normalize_feature(): feature_vectors = DBUtil.select_all_features() feature_vectors = numpy.asarray(feature_vectors) avg = numpy.mean(feature_vectors) std = numpy.std(feature_vectors, axis=0) normalized_vectors = (feature_vectors - avg) / std normalized_vectors = normalized_vectors.tolist() mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 1 for normalized_vector in normalized_vectors: if index % 100 == 0: print index cur.execute( "INSERT INTO Normalized_Features (id, visitDuration, bytesTransfer, numLogs2PerIP, numErrorPerIP, numLogsSecure, seriousError, tags, tags2) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", (index, normalized_vector[0], normalized_vector[1], normalized_vector[2], normalized_vector[3], normalized_vector[4], normalized_vector[5], normalized_vector[6], normalized_vector[7])) mysql_conn.commit() index = index + 1 cur.close() mysql_conn.close()
def simple_features(): log_list = DBUtil.select_all_logs() mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 0 for log in log_list: index = index + 1 if index % 1000 == 0: print index visitDuration = int(log.time_taken_ms) bytesTransfer = int(log.sc_bytes) * int(log.cs_bytes) seriousError = 1 if int(log.sc_status) >= 400 else 0 list1 = ["javascript", "expression", "applet", "meta", "xml", "blink", "link", "style", "script", "object", "iframe", "frame", "frameset", "ilayer", "layer", "bgsound", "title", "base", "vbscript", "embed", "`", "--", ";", "or", "select", "union", "insert", "update", "replace", "truncate", "delete", "sp", "xp", "system(", "eval(", "http:", "https:", "ftp:", "ptp:", "data:", "../", "..\\", "jsessionid", "login.jsp?userid", "login.jsp?password"] tags = uri_contain_keyword(log.cs_uri, list1) cur.execute( "INSERT INTO Features (id, c_ip, visitDuration, bytesTransfer, seriousError, tags) VALUES (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE visitDuration=VALUES(visitDuration), bytesTransfer=VALUES(bytesTransfer), seriousError=VALUES(seriousError), tags=VALUES(tags)", (log.id, log.c_ip, visitDuration, bytesTransfer, seriousError, tags)) mysql_conn.commit() cur.close() mysql_conn.close()
def simple_features(): log_list = DBUtil.select_all_logs() mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 0 for log in log_list: index = index + 1 if index % 1000 == 0: print index visitDuration = int(log.time_taken_ms) bytesTransfer = int(log.sc_bytes) * int(log.cs_bytes) seriousError = 1 if int(log.sc_status) >= 400 else 0 tags = access_tag(log.cs_uri) cur.execute( "INSERT INTO Features (id, c_ip, visitDuration, bytesTransfer, seriousError, tags) VALUES (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE visitDuration=VALUES(visitDuration), bytesTransfer=VALUES(bytesTransfer), seriousError=VALUES(seriousError), tags=VALUES(tags)", (log.id, log.c_ip, visitDuration, bytesTransfer, seriousError, tags)) mysql_conn.commit() cur.close() mysql_conn.close()
def normalize_feature(): feature_vectors = DBUtil.select_all_features() size = len(feature_vectors) feature_vectors = numpy.asarray(feature_vectors) min = numpy.min(feature_vectors, axis=0) max = numpy.max(feature_vectors, axis=0) normalized_vectors = numpy.divide(feature_vectors - min, max - min) normalized_vectors = normalized_vectors.tolist() mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 1 for normalized_vector in normalized_vectors: if index % 100 == 0: print str(index) + " of " + str(size) # visitDuration, numIP, bytesTransfer, urlLength, numLogs2PerIP, numErrorPerIP, numLogsSecure, unsafeChar cur.execute( "INSERT INTO Normalized_Features (id, visitDuration, numIP, bytesTransfer, urlLength, numLogs2PerIP, numErrorPerIP, numLogsSecure, unsafeChar) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", (index, normalized_vector[0], normalized_vector[1], normalized_vector[2], normalized_vector[3], normalized_vector[4], normalized_vector[5], normalized_vector[6], normalized_vector[7])) mysql_conn.commit() index = index + 1 cur.close() mysql_conn.close()
def simple_features(): log_list = DBUtil.select_all_logs() count_dict = DBUtil.select_log_count_by_ip() mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 0 for log in log_list: index = index + 1 if index % 1000 == 0: print index visitDuration = int(log.time_taken_ms) bytesTransfer = int(log.sc_bytes) + int(log.cs_bytes) urlLength = 0 space_index = log.cs_uri.find(" ") if space_index != -1: urlLength = len(log.cs_uri[space_index:]) if log.c_ip == "": numIP = 0 else: numIP = count_dict[log.c_ip] cur.execute( "INSERT INTO Features (id, c_ip, visitDuration, numIP, bytesTransfer, urlLength) VALUES (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE visitDuration=VALUES(visitDuration), numIP=VALUES(numIP), bytesTransfer=VALUES(bytesTransfer), urlLength=VALUES(urlLength)", (log.id, log.c_ip, visitDuration, numIP, bytesTransfer, urlLength)) mysql_conn.commit() cur.close() mysql_conn.close()
def mysql_connect_test(): mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() cur.execute("SELECT * FROM LOG LIMIT 1") for row in cur: print row cur.close() mysql_conn.close()
def unsafeChar(): log_list = DBUtil.select_all_logs() log_dict = {} unsafechar_value_dict = {} index = 0 total_log_num = len(log_list) for log in log_list: index = index + 1 if index % 1000 == 0: print str(index) + " of " + str(total_log_num) unsafechar_value = scanSafeChar(log.cs_uri) dict = log_dict.get(log.c_ip, {}) dict[log.id] = unsafechar_value log_dict[log.c_ip] = dict agg_value = unsafechar_value_dict.get(log.c_ip, 0) unsafechar_value_dict[log.c_ip] = agg_value + unsafechar_value mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 0 for c_ip, dict in log_dict.iteritems(): agg_value = unsafechar_value_dict.get(c_ip) for log_id, unsafechar_value in dict.iteritems(): if unsafechar_value > 0: index = index + 1 if index % 10 == 0: print str(index) + " of " + str(total_log_num) cur.execute("UPDATE Features SET unsafechar=%s WHERE id=%s", (agg_value, log_id)) mysql_conn.commit() cur.close() mysql_conn.close()
def insert_all_into_db(event_list): mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() error_count = 0 for event in event_list: try: cur.execute("INSERT INTO Log (log_time, s_sitename, s_computername, s_ip, cs_method, cs_uri_stem, cs_uri_query, s_port, cs_username, c_ip, cs_version, cs_user_agent, cs_cookie, cs_referer, cs_host, sc_status, sc_substatus, sc_win32_status, sc_bytes, cs_bytes, time_taken_ms) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", (event.log_time, event.s_sitename, event.s_computername, event.s_ip, event.cs_method, event.cs_uri_stem, event.cs_uri_query, event.s_port, event.cs_username, event.c_ip, event.cs_version, event.cs_user_agent, event.cs_cookie, event.cs_referer, event.cs_host, event.sc_status, event.sc_substatus, event.sc_win32_status, event.sc_bytes, event.cs_bytes, event.time_taken_ms)) mysql_conn.commit() except Exception,e: info = sys.exc_info() print info[0],":",info[1] print e print traceback.format_exc() print event mysql_conn.rollback() error_count = error_count + 1
def numErrorPerIP(): mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() cur.execute( "SELECT * FROM (SELECT c_ip, COUNT(*) FROM Cleaned_Log WHERE sc_status>=400 or sc_substatus!=0 GROUP BY c_ip) AS TEMP_TABLE WHERE TEMP_TABLE.c_ip!=''") rows = cur.fetchall() total = len(rows) print total index = 0 for row in rows: index = index + 1 if index % 10 == 0: print index cur.execute("UPDATE Features SET numErrorPerIP=%s WHERE c_ip=%s", (row[1], row[0])) mysql_conn.commit() cur.close() mysql_conn.close()
def numLogs2PerIP(): threshold = 1.5 log_list = DBUtil.select_all_logs() numLogs2PerIP_list = [] mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 0 for log in log_list: index = index + 1 if index % 100 == 0: print index lower_bound = log.log_time - datetime.timedelta(0, threshold) upper_bound = log.log_time + datetime.timedelta(0, threshold) cur.execute('SELECT COUNT(*) FROM Cleaned_Log WHERE c_ip=%s AND (log_time BETWEEN %s AND %s)', (log.c_ip, lower_bound, upper_bound)) rows = cur.fetchall() for row in rows: numLogs2PerIP_list.append([log.id, row[0]]) for numLogs2PerIP in numLogs2PerIP_list: cur.execute( "INSERT INTO Features (id, numLogs2PerIP) VALUES (%s, %s) ON DUPLICATE KEY UPDATE numLogs2PerIP=VALUES(numLogs2PerIP)", (numLogs2PerIP[0], numLogs2PerIP[1])) mysql_conn.commit() cur.close() mysql_conn.close()
def insert_session_into_table(session_threshold): mysql_conn = DBUtil.get_mysql_conn() user_list = [] cur = mysql_conn.cursor() cur.execute("SELECT * FROM users") rows = cur.fetchall() for row in rows: user = User(c_ip=row[0], cs_user_agent=row[1]) user_list.append(user) print "users:" + str(len(user_list)) session_list = [] user_count = 1 for user in user_list: cur.execute("SELECT * FROM LOG WHERE c_ip=%s and cs_user_agent=%s ORDER BY log_time ASC", (user.c_ip, user.cs_user_agent)) rows = cur.fetchall() session = Session(user.c_ip, user.cs_user_agent, session_threshold) if user_count % 10 == 0: print user_count user_count = user_count + 1 for row in rows: event = Event(date=None, time=None, s_sitename=row[1], s_computername=row[2], s_ip=row[3], cs_method=row[4], cs_uri_stem=row[5], cs_uri_query=row[6], s_port=row[7], cs_username=row[8], c_ip=row[9], cs_version=row[10], cs_user_agent=row[11], cs_cookie=row[12], cs_referer=row[13], cs_host=row[14], sc_status=row[15], sc_substatus=row[16], sc_win32_status=row[17], sc_bytes=row[18], cs_bytes=row[19], time_taken_ms=row[20], datetime_obj=row[0]) if session.is_in_this_session(event): session.add_event(event) else: session_list.append(session) # start new session session = Session(user.c_ip, user.cs_user_agent, session_threshold) session.add_event(event) print "sessions: " + str(len(session_list)) session_id = 1 for session in session_list: for event in session.event_list: try: cur.execute("INSERT INTO Sessions (session_id, log_time, s_sitename, s_computername, s_ip, cs_method, cs_uri_stem, cs_uri_query, s_port, cs_username, c_ip, cs_version, cs_user_agent, cs_cookie, cs_referer, cs_host, sc_status, sc_substatus, sc_win32_status, sc_bytes, cs_bytes, time_taken_ms) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", (session_id, event.log_time, event.s_sitename, event.s_computername, event.s_ip, event.cs_method, event.cs_uri_stem, event.cs_uri_query, event.s_port, event.cs_username, event.c_ip, event.cs_version, event.cs_user_agent, event.cs_cookie, event.cs_referer, event.cs_host, event.sc_status, event.sc_substatus, event.sc_win32_status, event.sc_bytes, event.cs_bytes, event.time_taken_ms)) mysql_conn.commit() except Exception,e: info = sys.exc_info() print info[0],":",info[1] print e print traceback.format_exc() mysql_conn.rollback() session_id = session_id + 1
def numLogs2PerIP(threshold): mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() query = ("SELECT * FROM Cleaned_Log ORDER BY log_time") cur.execute(query) rows = cur.fetchall() # inverted index # log.c_ip, ([log ids], [all the logs generated by this ip]) log_dict = {} print len(rows) index = 0 for row in rows: index = index + 1 if index%10==0: print str(index) + " of " + str(len(rows)) log = Log(date=None, time=None, id=row[0], datetime_obj=row[1], s_sitename=row[2], s_computername=row[3], s_ip=row[4], cs_method=row[5], cs_uri=row[6], s_port=row[7], cs_username=row[8], c_ip=row[9], cs_version=row[10], cs_user_agent=row[11], cs_cookie=row[12], cs_referer=row[13], cs_host=row[14], sc_status=row[15], sc_substatus=row[16], sc_win32_status=row[17], sc_bytes=row[18], cs_bytes=row[19], time_taken_ms=row[20]) list = log_dict.get(log.c_ip, [[],[]]) list[0].append(log.id) list[1].append(log) log_dict[log.c_ip] = list print len(log_dict) time_interval = datetime.timedelta(0, threshold) index = 0 for ip, list in log_dict.iteritems(): index = index + 1 if index%10==0: print str(index) + " of " + str(len(log_dict)) # log_ip: count count_dict = dict.fromkeys(list[0], 0) log_list = list[1] for i in range(0, len(log_list)): target_log = log_list[i] for j in range(i + 1, len(log_list)): scan_log = log_list[j] if scan_log.log_time - target_log.log_time <= time_interval: count_dict[target_log.id] = count_dict[target_log.id] + 1 count_dict[scan_log.id] = count_dict[scan_log.id] + 1 else: break for log_id, count in count_dict.iteritems(): cur.execute( "INSERT INTO Features (id, numLogs2PerIP) VALUES (%s, %s) ON DUPLICATE KEY UPDATE numLogs2PerIP=VALUES(numLogs2PerIP)", (log_id, count)) mysql_conn.commit() cur.close() mysql_conn.close()