Example #1
0
     autoscaled_x_array = np.array(autoscaled_x)
     for nonlinear_svr_gamma in ocsvm_gammas:
         gram_matrix = np.exp(- nonlinear_svr_gamma * ((autoscaled_x_array[:, np.newaxis] - autoscaled_x_array) ** 2).sum(axis=2))
         variance_of_gram_matrix.append(gram_matrix.var(ddof=1))
     optimal_gamma = ocsvm_gammas[np.where(variance_of_gram_matrix==np.max(variance_of_gram_matrix))[0][0]]
     # 最適化された γ
     print('最適化された gamma :', optimal_gamma)
 else:
     optimal_gamma = ocsvm_gamma
 
 # OCSVM による AD
 ad_model = OneClassSVM(kernel='rbf', gamma=optimal_gamma, nu=ocsvm_nu)  # AD モデルの宣言
 ad_model.fit(autoscaled_x)  # モデル構築
 
 # トレーニングデータのデータ密度 (f(x) の値)
 data_density_train = ad_model.decision_function(autoscaled_x)
 number_of_support_vectors = len(ad_model.support_)
 number_of_outliers_in_training_data = sum(data_density_train < 0)
 print('\nトレーニングデータにおけるサポートベクター数 :', number_of_support_vectors)
 print('トレーニングデータにおけるサポートベクターの割合 :', number_of_support_vectors / x.shape[0])
 print('\nトレーニングデータにおける外れサンプル数 :', number_of_outliers_in_training_data)
 print('トレーニングデータにおける外れサンプルの割合 :', number_of_outliers_in_training_data / x.shape[0])
 data_density_train = pd.DataFrame(data_density_train, index=x.index, columns=['ocsvm_data_density'])
 data_density_train.to_csv('ocsvm_data_density_train.csv')  # csv ファイルに保存。同じ名前のファイルがあるときは上書きされるため注意
 # トレーニングデータに対して、AD の中か外かを判定
 inside_ad_flag_train = data_density_train >= 0
 # 予測用データのデータ密度 (f(x) の値)
 ad_index_prediction = ad_model.decision_function(autoscaled_x_prediction)
 number_of_outliers_in_prediction_data = sum(ad_index_prediction < 0)
 print('\nテストデータにおける外れサンプル数 :', number_of_outliers_in_prediction_data)
 print('テストデータにおける外れサンプルの割合 :', number_of_outliers_in_prediction_data / x_prediction.shape[0])
Example #2
0
class AnomalyModel:
	def __init__(self, trainingSet, anomalyMethod = "KNN", h = None ):
		self.method = anomalyMethod
		
		if self.method == "online":
			self.h = h
		
		if self.method == "centroid":
			self.h = Util.centroid( trainingSet )
		
		if self.method == "medoid":
			self.h = Util.medoid( trainingSet )
		
		if self.method == "IGNG":
			self.h = IGNG( radius = PARAMS["R"] ) # IGNG.estimate_radius( trainingSet )
			self.h.train( trainingSet )
			# print len( self.h.get_nodes_positions() ), len(trainingSet)
			
		if self.method == "GNG":
			self.h = GNG(period = 50)
			self.h.train( trainingSet )
			
		if self.method == "KNN":
			self.h = NearestNeighbors(algorithm='ball_tree', metric='euclidean').fit(trainingSet)
			
		if self.method == "RNN":
			self.h = NearestNeighbors(algorithm='ball_tree', metric='euclidean').fit(trainingSet)
			
		if self.method == "SVM":
			self.h = svm.OneClassSVM(nu=PARAMS["NU"], kernel="rbf", gamma=PARAMS["GAMMA"]).fit(trainingSet)
			
	def getAnomalyScore(self, x, inversed = False):
		if self.method == "online":
			alpha_m = self.h.getNearestDist(x) # alpha_m = self.h.getNearestDistToMature(x)
			if inversed == True: alpha_m = 1. / alpha_m
			
		if self.method == "centroid":
			alpha_m = Util.dist(x, self.h)
			if inversed == True: alpha_m = 1. / alpha_m
			
		if self.method == "medoid":
			alpha_m = Util.dist(x, self.h)
			if inversed == True: alpha_m = 1. / alpha_m
			
		if self.method == "IGNG":
			alpha_m = self.h.getNearestDist(x)
			if inversed == True: alpha_m = 1. / alpha_m
			
		if self.method == "GNG":
			alpha_m = self.h.getNearestDist(x)
			if inversed == True: alpha_m = 1. / alpha_m
			
		if self.method == "KNN":
			distances, indices = self.h.kneighbors( x, n_neighbors = PARAMS["K"] )
			alpha_m = sum( distances[0] )
			if inversed == True: alpha_m = 1. / alpha_m
			
		if self.method == "RNN":
			distances, indices = self.h.radius_neighbors(x, radius = PARAMS["R"])
			alpha_m = 1. / ( 1. + sum( [ 1./di for di in distances[0] if di != 0 ] ) )
			if inversed == True: alpha_m = 1. / alpha_m
		
		if self.method == "SVM":
			alpha_m = -1. * self.h.decision_function(x)[0][0]
			if inversed == True: alpha_m = -1. * alpha_m
		
		return alpha_m