autoscaled_x_array = np.array(autoscaled_x) for nonlinear_svr_gamma in ocsvm_gammas: gram_matrix = np.exp(- nonlinear_svr_gamma * ((autoscaled_x_array[:, np.newaxis] - autoscaled_x_array) ** 2).sum(axis=2)) variance_of_gram_matrix.append(gram_matrix.var(ddof=1)) optimal_gamma = ocsvm_gammas[np.where(variance_of_gram_matrix==np.max(variance_of_gram_matrix))[0][0]] # 最適化された γ print('最適化された gamma :', optimal_gamma) else: optimal_gamma = ocsvm_gamma # OCSVM による AD ad_model = OneClassSVM(kernel='rbf', gamma=optimal_gamma, nu=ocsvm_nu) # AD モデルの宣言 ad_model.fit(autoscaled_x) # モデル構築 # トレーニングデータのデータ密度 (f(x) の値) data_density_train = ad_model.decision_function(autoscaled_x) number_of_support_vectors = len(ad_model.support_) number_of_outliers_in_training_data = sum(data_density_train < 0) print('\nトレーニングデータにおけるサポートベクター数 :', number_of_support_vectors) print('トレーニングデータにおけるサポートベクターの割合 :', number_of_support_vectors / x.shape[0]) print('\nトレーニングデータにおける外れサンプル数 :', number_of_outliers_in_training_data) print('トレーニングデータにおける外れサンプルの割合 :', number_of_outliers_in_training_data / x.shape[0]) data_density_train = pd.DataFrame(data_density_train, index=x.index, columns=['ocsvm_data_density']) data_density_train.to_csv('ocsvm_data_density_train.csv') # csv ファイルに保存。同じ名前のファイルがあるときは上書きされるため注意 # トレーニングデータに対して、AD の中か外かを判定 inside_ad_flag_train = data_density_train >= 0 # 予測用データのデータ密度 (f(x) の値) ad_index_prediction = ad_model.decision_function(autoscaled_x_prediction) number_of_outliers_in_prediction_data = sum(ad_index_prediction < 0) print('\nテストデータにおける外れサンプル数 :', number_of_outliers_in_prediction_data) print('テストデータにおける外れサンプルの割合 :', number_of_outliers_in_prediction_data / x_prediction.shape[0])
class AnomalyModel: def __init__(self, trainingSet, anomalyMethod = "KNN", h = None ): self.method = anomalyMethod if self.method == "online": self.h = h if self.method == "centroid": self.h = Util.centroid( trainingSet ) if self.method == "medoid": self.h = Util.medoid( trainingSet ) if self.method == "IGNG": self.h = IGNG( radius = PARAMS["R"] ) # IGNG.estimate_radius( trainingSet ) self.h.train( trainingSet ) # print len( self.h.get_nodes_positions() ), len(trainingSet) if self.method == "GNG": self.h = GNG(period = 50) self.h.train( trainingSet ) if self.method == "KNN": self.h = NearestNeighbors(algorithm='ball_tree', metric='euclidean').fit(trainingSet) if self.method == "RNN": self.h = NearestNeighbors(algorithm='ball_tree', metric='euclidean').fit(trainingSet) if self.method == "SVM": self.h = svm.OneClassSVM(nu=PARAMS["NU"], kernel="rbf", gamma=PARAMS["GAMMA"]).fit(trainingSet) def getAnomalyScore(self, x, inversed = False): if self.method == "online": alpha_m = self.h.getNearestDist(x) # alpha_m = self.h.getNearestDistToMature(x) if inversed == True: alpha_m = 1. / alpha_m if self.method == "centroid": alpha_m = Util.dist(x, self.h) if inversed == True: alpha_m = 1. / alpha_m if self.method == "medoid": alpha_m = Util.dist(x, self.h) if inversed == True: alpha_m = 1. / alpha_m if self.method == "IGNG": alpha_m = self.h.getNearestDist(x) if inversed == True: alpha_m = 1. / alpha_m if self.method == "GNG": alpha_m = self.h.getNearestDist(x) if inversed == True: alpha_m = 1. / alpha_m if self.method == "KNN": distances, indices = self.h.kneighbors( x, n_neighbors = PARAMS["K"] ) alpha_m = sum( distances[0] ) if inversed == True: alpha_m = 1. / alpha_m if self.method == "RNN": distances, indices = self.h.radius_neighbors(x, radius = PARAMS["R"]) alpha_m = 1. / ( 1. + sum( [ 1./di for di in distances[0] if di != 0 ] ) ) if inversed == True: alpha_m = 1. / alpha_m if self.method == "SVM": alpha_m = -1. * self.h.decision_function(x)[0][0] if inversed == True: alpha_m = -1. * alpha_m return alpha_m