예제 #1
0
파일: FeatureUtil.py 프로젝트: Hui-Li/SLA
def tags2():
    logs = DBUtil.select_all_logs()

    list2 = ["javascript", "expression", "applet", "meta", "xml", "blink", "link", "style", "script", "object", "iframe", "frame", "frameset", "ilayer", "layer", "bgsound", "title", "base", "vbscript", "embed", "`", "--", ";", "or", "select", "union", "insert", \
            "update", "replace", "truncate", "delete", "sp", "xp", "system(", "eval(", "ftp:", "ptp:", "data:", "../", "..\\", "jsessionid", "login.jsp?userid", "login.jsp?password"]

    ip_dict = {}
    # index = 0
    print len(logs)
    for log in logs:
        # index = index + 1
        # if index % 1000 == 0:
            # print index
        tags2 = uri_contain_keyword(log.cs_uri, list2)
        if tags2 == 1:
            count = ip_dict.get(log.c_ip, 0)
            count = count + 1
            ip_dict[log.c_ip] = count

    print len(ip_dict)
    index = 0
    mysql_conn = DBUtil.get_mysql_conn()
    cur = mysql_conn.cursor()
    for ip, count in ip_dict.iteritems():
        index = index + 1
        if index % 10 == 0:
            print index

        cur.execute("UPDATE Features SET tags2=%s WHERE c_ip=%s", (count, ip))

        mysql_conn.commit()

    cur.close()
    mysql_conn.close()
예제 #2
0
파일: FeatureUtil.py 프로젝트: Hui-Li/SLA
def normalize_feature():
    feature_vectors = DBUtil.select_all_features()
    feature_vectors = numpy.asarray(feature_vectors)
    avg = numpy.mean(feature_vectors)
    std = numpy.std(feature_vectors, axis=0)

    normalized_vectors = (feature_vectors - avg) / std
    normalized_vectors = normalized_vectors.tolist()

    mysql_conn = DBUtil.get_mysql_conn()
    cur = mysql_conn.cursor()

    index = 1
    for normalized_vector in normalized_vectors:
        if index % 100 == 0:
            print index

        cur.execute(
            "INSERT INTO Normalized_Features (id, visitDuration, bytesTransfer, numLogs2PerIP, numErrorPerIP, numLogsSecure, seriousError, tags, tags2) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
            (index, normalized_vector[0], normalized_vector[1], normalized_vector[2], normalized_vector[3], normalized_vector[4],
             normalized_vector[5], normalized_vector[6], normalized_vector[7]))
        mysql_conn.commit()
        index = index + 1

    cur.close()
    mysql_conn.close()
예제 #3
0
파일: FeatureUtil.py 프로젝트: Hui-Li/SLA
def simple_features():
    log_list = DBUtil.select_all_logs()

    mysql_conn = DBUtil.get_mysql_conn()
    cur = mysql_conn.cursor()

    index = 0
    for log in log_list:

        index = index + 1
        if index % 1000 == 0:
            print index

        visitDuration = int(log.time_taken_ms)
        bytesTransfer = int(log.sc_bytes) * int(log.cs_bytes)
        seriousError = 1 if int(log.sc_status) >= 400 else 0

        list1 = ["javascript", "expression", "applet", "meta", "xml", "blink", "link", "style", "script", "object", "iframe", "frame", "frameset",
            "ilayer", "layer", "bgsound", "title", "base", "vbscript", "embed", "`", "--", ";", "or", "select", "union", "insert", "update",
            "replace", "truncate", "delete", "sp", "xp", "system(", "eval(", "http:", "https:", "ftp:", "ptp:", "data:", "../", "..\\", "jsessionid", "login.jsp?userid", "login.jsp?password"]

        tags = uri_contain_keyword(log.cs_uri, list1)

        cur.execute(
            "INSERT INTO Features (id, c_ip, visitDuration, bytesTransfer, seriousError, tags) VALUES (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE visitDuration=VALUES(visitDuration), bytesTransfer=VALUES(bytesTransfer), seriousError=VALUES(seriousError), tags=VALUES(tags)",
            (log.id, log.c_ip, visitDuration, bytesTransfer, seriousError, tags))
        mysql_conn.commit()

    cur.close()
    mysql_conn.close()
예제 #4
0
파일: FeatureUtil.py 프로젝트: Hui-Li/SLA
def simple_features():
    log_list = DBUtil.select_all_logs()

    mysql_conn = DBUtil.get_mysql_conn()
    cur = mysql_conn.cursor()

    index = 0
    for log in log_list:

        index = index + 1
        if index % 1000 == 0:
            print index

        visitDuration = int(log.time_taken_ms)
        bytesTransfer = int(log.sc_bytes) * int(log.cs_bytes)
        seriousError = 1 if int(log.sc_status) >= 400 else 0
        tags = access_tag(log.cs_uri)

        cur.execute(
            "INSERT INTO Features (id, c_ip, visitDuration, bytesTransfer, seriousError, tags) VALUES (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE visitDuration=VALUES(visitDuration), bytesTransfer=VALUES(bytesTransfer), seriousError=VALUES(seriousError), tags=VALUES(tags)",
            (log.id, log.c_ip, visitDuration, bytesTransfer, seriousError, tags))
        mysql_conn.commit()

    cur.close()
    mysql_conn.close()
예제 #5
0
파일: FeatureUtil.py 프로젝트: Hui-Li/SLA
def normalize_feature():
    feature_vectors = DBUtil.select_all_features()
    size = len(feature_vectors)
    feature_vectors = numpy.asarray(feature_vectors)

    min = numpy.min(feature_vectors, axis=0)
    max = numpy.max(feature_vectors, axis=0)

    normalized_vectors = numpy.divide(feature_vectors - min, max - min)
    normalized_vectors = normalized_vectors.tolist()

    mysql_conn = DBUtil.get_mysql_conn()
    cur = mysql_conn.cursor()

    index = 1
    for normalized_vector in normalized_vectors:
        if index % 100 == 0:
            print str(index) + " of " + str(size)

        # visitDuration, numIP, bytesTransfer, urlLength, numLogs2PerIP, numErrorPerIP, numLogsSecure, unsafeChar
        cur.execute(
            "INSERT INTO Normalized_Features (id, visitDuration, numIP, bytesTransfer, urlLength, numLogs2PerIP, numErrorPerIP, numLogsSecure, unsafeChar) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
            (index, normalized_vector[0], normalized_vector[1], normalized_vector[2], normalized_vector[3], normalized_vector[4], normalized_vector[5], normalized_vector[6], normalized_vector[7]))
        mysql_conn.commit()
        index = index + 1

    cur.close()
    mysql_conn.close()
예제 #6
0
파일: FeatureUtil.py 프로젝트: Hui-Li/SLA
def simple_features():
    log_list = DBUtil.select_all_logs()
    count_dict = DBUtil.select_log_count_by_ip()

    mysql_conn = DBUtil.get_mysql_conn()
    cur = mysql_conn.cursor()

    index = 0
    for log in log_list:

        index = index + 1
        if index % 1000 == 0:
            print index

        visitDuration = int(log.time_taken_ms)
        bytesTransfer = int(log.sc_bytes) + int(log.cs_bytes)
        urlLength = 0
        space_index = log.cs_uri.find(" ")
        if space_index != -1:
            urlLength = len(log.cs_uri[space_index:])

        if log.c_ip == "":
            numIP = 0
        else:
            numIP = count_dict[log.c_ip]
        cur.execute(
            "INSERT INTO Features (id, c_ip, visitDuration, numIP, bytesTransfer, urlLength) VALUES (%s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE visitDuration=VALUES(visitDuration), numIP=VALUES(numIP), bytesTransfer=VALUES(bytesTransfer), urlLength=VALUES(urlLength)",
            (log.id, log.c_ip, visitDuration, numIP, bytesTransfer, urlLength))
        mysql_conn.commit()

    cur.close()
    mysql_conn.close()
예제 #7
0
파일: LogFileUtil.py 프로젝트: Hui-Li/SLA
def mysql_connect_test():
    mysql_conn = DBUtil.get_mysql_conn()

    cur = mysql_conn.cursor()
    cur.execute("SELECT * FROM LOG LIMIT 1")
    for row in cur:
        print row

    cur.close()
    mysql_conn.close()
예제 #8
0
파일: FeatureUtil.py 프로젝트: Hui-Li/SLA
def unsafeChar():

    log_list = DBUtil.select_all_logs()
    log_dict = {}
    unsafechar_value_dict = {}

    index = 0
    total_log_num = len(log_list)

    for log in log_list:

        index = index + 1
        if index % 1000 == 0:
            print str(index) + " of " + str(total_log_num)

        unsafechar_value = scanSafeChar(log.cs_uri)

        dict = log_dict.get(log.c_ip, {})

        dict[log.id] = unsafechar_value

        log_dict[log.c_ip] = dict

        agg_value = unsafechar_value_dict.get(log.c_ip, 0)
        unsafechar_value_dict[log.c_ip] = agg_value + unsafechar_value


    mysql_conn = DBUtil.get_mysql_conn()
    cur = mysql_conn.cursor()

    index = 0

    for c_ip, dict in log_dict.iteritems():
        agg_value = unsafechar_value_dict.get(c_ip)
        for log_id, unsafechar_value in dict.iteritems():
            if unsafechar_value > 0:

                index = index + 1
                if index % 10 == 0:
                    print str(index) + " of " + str(total_log_num)

                cur.execute("UPDATE Features SET unsafechar=%s WHERE id=%s", (agg_value, log_id))

                mysql_conn.commit()

    cur.close()
    mysql_conn.close()
예제 #9
0
파일: LogFileUtil.py 프로젝트: Hui-Li/SLA
def insert_all_into_db(event_list):

    mysql_conn = DBUtil.get_mysql_conn()

    cur = mysql_conn.cursor()

    error_count = 0
    for event in event_list:
        try:
           cur.execute("INSERT INTO Log (log_time, s_sitename, s_computername, s_ip, cs_method, cs_uri_stem, cs_uri_query, s_port, cs_username, c_ip, cs_version, cs_user_agent, cs_cookie, cs_referer, cs_host, sc_status, sc_substatus, sc_win32_status, sc_bytes, cs_bytes, time_taken_ms) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
                  (event.log_time, event.s_sitename, event.s_computername, event.s_ip, event.cs_method, event.cs_uri_stem, event.cs_uri_query, event.s_port, event.cs_username, event.c_ip, event.cs_version, event.cs_user_agent, event.cs_cookie, event.cs_referer, event.cs_host, event.sc_status, event.sc_substatus, event.sc_win32_status, event.sc_bytes, event.cs_bytes, event.time_taken_ms))
           mysql_conn.commit()
        except Exception,e:
            info = sys.exc_info()
            print info[0],":",info[1]
            print e
            print traceback.format_exc()
            print event
            mysql_conn.rollback()
            error_count = error_count + 1
예제 #10
0
파일: FeatureUtil.py 프로젝트: Hui-Li/SLA
def numErrorPerIP():
    mysql_conn = DBUtil.get_mysql_conn()
    cur = mysql_conn.cursor()
    cur.execute(
        "SELECT * FROM (SELECT c_ip, COUNT(*) FROM Cleaned_Log WHERE sc_status>=400 or sc_substatus!=0 GROUP BY c_ip) AS TEMP_TABLE WHERE TEMP_TABLE.c_ip!=''")

    rows = cur.fetchall()

    total = len(rows)
    print total
    index = 0
    for row in rows:
        index = index + 1
        if index % 10 == 0:
            print index

        cur.execute("UPDATE Features SET numErrorPerIP=%s WHERE c_ip=%s", (row[1], row[0]))

        mysql_conn.commit()

    cur.close()
    mysql_conn.close()
예제 #11
0
파일: FeatureUtil.py 프로젝트: Hui-Li/SLA
def numLogs2PerIP():
    threshold = 1.5

    log_list = DBUtil.select_all_logs()

    numLogs2PerIP_list = []

    mysql_conn = DBUtil.get_mysql_conn()
    cur = mysql_conn.cursor()

    index = 0
    for log in log_list:

        index = index + 1
        if index % 100 == 0:
            print index

        lower_bound = log.log_time - datetime.timedelta(0, threshold)
        upper_bound = log.log_time + datetime.timedelta(0, threshold)

        cur.execute('SELECT COUNT(*) FROM Cleaned_Log WHERE c_ip=%s AND (log_time BETWEEN %s AND %s)',
                    (log.c_ip, lower_bound, upper_bound))

        rows = cur.fetchall()

        for row in rows:
            numLogs2PerIP_list.append([log.id, row[0]])

    for numLogs2PerIP in numLogs2PerIP_list:
        cur.execute(
            "INSERT INTO Features (id, numLogs2PerIP) VALUES (%s, %s) ON DUPLICATE KEY UPDATE numLogs2PerIP=VALUES(numLogs2PerIP)",
            (numLogs2PerIP[0], numLogs2PerIP[1]))
        mysql_conn.commit()

    cur.close()
    mysql_conn.close()
예제 #12
0
파일: LogFileUtil.py 프로젝트: Hui-Li/SLA
def insert_session_into_table(session_threshold):

    mysql_conn = DBUtil.get_mysql_conn()

    user_list = []

    cur = mysql_conn.cursor()
    cur.execute("SELECT * FROM users")

    rows = cur.fetchall()

    for row in rows:
        user = User(c_ip=row[0], cs_user_agent=row[1])
        user_list.append(user)

    print "users:" + str(len(user_list))

    session_list = []

    user_count = 1
    for user in user_list:
        cur.execute("SELECT * FROM LOG WHERE c_ip=%s and cs_user_agent=%s ORDER BY log_time ASC", (user.c_ip, user.cs_user_agent))

        rows = cur.fetchall()

        session = Session(user.c_ip, user.cs_user_agent, session_threshold)
        if user_count % 10 == 0:
            print user_count

        user_count = user_count + 1

        for row in rows:
            event = Event(date=None, time=None, s_sitename=row[1], s_computername=row[2], s_ip=row[3], cs_method=row[4], cs_uri_stem=row[5], cs_uri_query=row[6], s_port=row[7],
                 cs_username=row[8], c_ip=row[9], cs_version=row[10], cs_user_agent=row[11], cs_cookie=row[12], cs_referer=row[13], cs_host=row[14], sc_status=row[15], sc_substatus=row[16],
                 sc_win32_status=row[17], sc_bytes=row[18], cs_bytes=row[19], time_taken_ms=row[20], datetime_obj=row[0])

            if session.is_in_this_session(event):
                session.add_event(event)
            else:
                session_list.append(session)

                # start new session
                session = Session(user.c_ip, user.cs_user_agent, session_threshold)
                session.add_event(event)

    print "sessions: " + str(len(session_list))
    session_id = 1
    for session in session_list:
        for event in session.event_list:
            try:
               cur.execute("INSERT INTO Sessions (session_id, log_time, s_sitename, s_computername, s_ip, cs_method, cs_uri_stem, cs_uri_query, s_port, cs_username, c_ip, cs_version, cs_user_agent, cs_cookie, cs_referer, cs_host, sc_status, sc_substatus, sc_win32_status, sc_bytes, cs_bytes, time_taken_ms) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
                      (session_id, event.log_time, event.s_sitename, event.s_computername, event.s_ip, event.cs_method, event.cs_uri_stem, event.cs_uri_query, event.s_port, event.cs_username, event.c_ip, event.cs_version, event.cs_user_agent, event.cs_cookie, event.cs_referer, event.cs_host, event.sc_status, event.sc_substatus, event.sc_win32_status, event.sc_bytes, event.cs_bytes, event.time_taken_ms))
               mysql_conn.commit()
            except Exception,e:
                info = sys.exc_info()
                print info[0],":",info[1]
                print e
                print traceback.format_exc()
                mysql_conn.rollback()

        session_id = session_id + 1
예제 #13
0
파일: FeatureUtil.py 프로젝트: Hui-Li/SLA
def numLogs2PerIP(threshold):

    mysql_conn = DBUtil.get_mysql_conn()

    cur = mysql_conn.cursor()
    query = ("SELECT * FROM Cleaned_Log ORDER BY log_time")
    cur.execute(query)

    rows = cur.fetchall()

    # inverted index
    # log.c_ip, ([log ids], [all the logs generated by this ip])
    log_dict = {}
    print len(rows)
    index = 0
    for row in rows:
        index = index + 1
        if index%10==0:
            print str(index) + " of " + str(len(rows))

        log = Log(date=None, time=None, id=row[0], datetime_obj=row[1], s_sitename=row[2], s_computername=row[3], s_ip=row[4], cs_method=row[5], cs_uri=row[6], s_port=row[7],
                 cs_username=row[8], c_ip=row[9], cs_version=row[10], cs_user_agent=row[11], cs_cookie=row[12], cs_referer=row[13], cs_host=row[14], sc_status=row[15], sc_substatus=row[16],
                 sc_win32_status=row[17], sc_bytes=row[18], cs_bytes=row[19], time_taken_ms=row[20])
        list = log_dict.get(log.c_ip, [[],[]])
        list[0].append(log.id)
        list[1].append(log)

        log_dict[log.c_ip] = list

    print len(log_dict)

    time_interval = datetime.timedelta(0, threshold)

    index = 0
    for ip, list in log_dict.iteritems():
        index = index + 1
        if index%10==0:
            print str(index) + " of " + str(len(log_dict))

        # log_ip: count
        count_dict = dict.fromkeys(list[0], 0)
        log_list = list[1]
        for i in range(0, len(log_list)):
            target_log = log_list[i]

            for j in range(i + 1, len(log_list)):
                scan_log = log_list[j]
                if scan_log.log_time - target_log.log_time <= time_interval:
                    count_dict[target_log.id] = count_dict[target_log.id] + 1
                    count_dict[scan_log.id] = count_dict[scan_log.id] + 1
                else:
                    break

        for log_id, count in count_dict.iteritems():
            cur.execute(
            "INSERT INTO Features (id, numLogs2PerIP) VALUES (%s, %s) ON DUPLICATE KEY UPDATE numLogs2PerIP=VALUES(numLogs2PerIP)",
            (log_id, count))
            mysql_conn.commit()

    cur.close()
    mysql_conn.close()