def normalize_feature(): feature_vectors = DBUtil.select_all_features() feature_vectors = numpy.asarray(feature_vectors) avg = numpy.mean(feature_vectors) std = numpy.std(feature_vectors, axis=0) normalized_vectors = (feature_vectors - avg) / std normalized_vectors = normalized_vectors.tolist() mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 1 for normalized_vector in normalized_vectors: if index % 100 == 0: print index cur.execute( "INSERT INTO Normalized_Features (id, visitDuration, bytesTransfer, numLogs2PerIP, numErrorPerIP, numLogsSecure, seriousError, tags, tags2) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", (index, normalized_vector[0], normalized_vector[1], normalized_vector[2], normalized_vector[3], normalized_vector[4], normalized_vector[5], normalized_vector[6], normalized_vector[7])) mysql_conn.commit() index = index + 1 cur.close() mysql_conn.close()
def normalize_feature(): feature_vectors = DBUtil.select_all_features() size = len(feature_vectors) feature_vectors = numpy.asarray(feature_vectors) min = numpy.min(feature_vectors, axis=0) max = numpy.max(feature_vectors, axis=0) normalized_vectors = numpy.divide(feature_vectors - min, max - min) normalized_vectors = normalized_vectors.tolist() mysql_conn = DBUtil.get_mysql_conn() cur = mysql_conn.cursor() index = 1 for normalized_vector in normalized_vectors: if index % 100 == 0: print str(index) + " of " + str(size) # visitDuration, numIP, bytesTransfer, urlLength, numLogs2PerIP, numErrorPerIP, numLogsSecure, unsafeChar cur.execute( "INSERT INTO Normalized_Features (id, visitDuration, numIP, bytesTransfer, urlLength, numLogs2PerIP, numErrorPerIP, numLogsSecure, unsafeChar) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", (index, normalized_vector[0], normalized_vector[1], normalized_vector[2], normalized_vector[3], normalized_vector[4], normalized_vector[5], normalized_vector[6], normalized_vector[7])) mysql_conn.commit() index = index + 1 cur.close() mysql_conn.close()