Esempio n. 1
0
 def __init__(self,
              anomalies,
              unlabel,
              classifer,
              return_proba=True,
              n_clusters='auto',
              cluster_algo='kmeans',
              contamination=0.02,
              theta=0.85,
              alpha='auto',
              beta='auto',
              random_state=2018):
     scaler = StandardScaler()
     self.anomalies = scaler.fit_transform(anomalies)  # 对应于已知的正样本集(P集)
     self.unlabel = scaler.fit_transform(unlabel)  # 对应于无标签样本集(U集)
     self.n_clusters = n_clusters  # 聚类簇数可以预先指定,也可以由get_cluster_centers自动确定最佳聚类簇数
     self.classifer = classifer  # 选取的分类器
     self.return_proba = return_proba  # 布尔型参数,是否返回样本取正的后验概率
     self.cluster_algo = cluster_algo  # 聚类算法,可以选取'spectral'、'birch'、'dbscan'、'kmeans',默认取'kmeans'
     self.contamination = contamination  # contamination为预估的U中异常样本(即正样本)比例
     self.theta = theta  # isolation_score、similarity_score的加权系数分别为theta、1-theta
     self.alpha = alpha  # 论文默认取已知异常样本(P集)的total_score均值,作为无标签样本是否为potential anomalies的阈值
     self.beta = beta  # 判定无标签样本是否为reliable normal的阈值
     self.random_state = random_state
     self.centers = get_cluster_centers(self.anomalies, self.n_clusters,
                                        self.cluster_algo)  # 返回聚类的簇中心
Esempio n. 2
0
    def __init__(self, anomalies, unlabel, classifer, cluster_algo='kmeans', n_clusters='auto', 
                 contamination=0.01, theta=0.85, alpha='auto', beta='auto', return_proba=False, 
                 random_state=2018):
        '''
        :param anomalies: Observed anomaly data sets
        
        :param unlabel:  Unlabeled data sets.
        
        :param classifer: A Classifer fitting weighted samples and labels to predict unlabel samples.
        
        :param cluster_algo: str, {'kmeans'、'spectral'、'birch'、'dbscan'}, default = 'kmeans'
             Clustering algorithm for clustering anomaly samples.      
              
        :param n_clusters: int, default=5
             The number of clusters to form as well as the number of centroids to generate.
        
        :param contamination : float, range (0, 0.5).
              The proportion of outliers in the data set. 

        :param theta : float, range [0, 1].
              The weights of isolation_score and similarity_score are theta and 1-theta respectively.
              
        :param alpha : float, should be positive number, default = mean value of anomalies's score
              Threshold value for determining unlabel sample as potential anomaly
              
        :param beta : float, should be positive number
              Threshold value for determining unlabel sample as reliable normal sample

        :param return_proba : bool, default=False
              Whether return the predicted probability for positive(anomaly) class for each sample.
              Need classifer to provide predict_proba method.
        '''
        dataset_scaled = StandardScaler().fit_transform(np.r_[anomalies, unlabel])
        self.anomalies = dataset_scaled[:len(anomalies), :] 
        self.unlabel = dataset_scaled[len(anomalies):, :] 
        self.contamination = contamination
        self.classifer = classifer 
        self.n_clusters = n_clusters
        self.cluster_algo = cluster_algo
        self.theta = theta 
        self.alpha = alpha 
        self.beta = beta 
        self.return_proba = return_proba 
        self.random_state = random_state
        self.centers, self.cluster_score = get_cluster_centers(self.anomalies, self.n_clusters, self.cluster_algo)
Esempio n. 3
0
    def __init__(self,
                 anomalies,
                 unlabel,
                 classifer,
                 cluster_algo='kmeans',
                 n_clusters='auto',
                 kernel='rbf',
                 verbose=3,
                 contamination=0.01,
                 theta=0.85,
                 alpha='auto',
                 beta='auto',
                 return_proba=False,
                 random_state=2018):
        '''
        :param anomalies: Observed anomaly datasets.
        
        :param unlabel:  Unlabeled datasets.
        
        :param classifer: A Classifer fitting weighted samples and labels to predict unlabel samples.
        
        :param cluster_algo: str, {'kmeans'、'spectral'、'birch'、'dbscan'}, default = 'kmeans'
             Clustering algorithm for clustering anomaly samples.      
              
        :param n_clusters: int, default=5
             The number of clusters to form as well as the number of centroids to generate.
        
        :param kernel: str, default='rbf'.
             'linear' | 'poly' | 'rbf' | 'sigmoid' | 'cosine' | 'precomputed' kernel.
        
        :param verbose: int, default=3, Verbosity mode. the higher, the less messages.
              KernelPCA is time-consuming, and the verbose parameter helps to check the progress 
              of the reconstruction. If verbose = m, information is printed every m rounds.
              
        :param contamination : float, range (0, 0.5). The proportion of outliers in the data set. 

        :param theta : float, range [0, 1].
              The weights of isolation_score and similarity_score are theta and 1-theta respectively.
              
        :param alpha : float, should be positive number, default = mean value of anomalies's score
              Threshold value for determining unlabel sample as potential anomaly
              
        :param beta : float, should be positive number
              Threshold value for determining unlabel sample as reliable normal sample

        :param return_proba : bool, default=False
              Whether return the predicted probability for positive(anomaly) class for each sample.
              Need classifer to provide predict_proba method.
        '''
        self.dataset = StandardScaler().fit_transform(np.r_[anomalies,
                                                            unlabel])
        self.anomalies = self.dataset[:len(anomalies), :]
        self.unlabel = self.dataset[len(anomalies):, :]
        self.contamination = contamination
        self.verbose = verbose
        self.kernel = kernel
        self.classifer = classifer
        self.n_clusters = n_clusters
        self.cluster_algo = cluster_algo
        self.theta = theta
        self.alpha = alpha
        self.beta = beta
        self.return_proba = return_proba
        self.random_state = random_state
        self.centers, self.cluster_score = get_cluster_centers(
            self.anomalies, self.n_clusters, self.cluster_algo)