コード例 #1
0
def cria_modelo(df_preprocessed, df_market, df_ptf):

    # Dados de treino e dados de validação
    df_train = df_preprocessed[df_preprocessed.index.isin(df_ptf.id)]

    # Treinamento do modelo
    clf = OneClassSVM(gamma='auto').fit(df_train)

    # Calcular treshold
    score_train = clf.score_samples(df_train)
    treshold = np.quantile(score_train, 0.1)

    # Aplicar modelo nos dados de mercado
    score_test = clf.score_samples(df_preprocessed)

    # Classificação utilizando o treshold
    pred = score_test >= treshold

    # Criação de coluna "recomendado" com os labels
    portfolio = df_market.index.isin(df_ptf.id)
    df_market_labeled = df_market.copy()
    aux = ['Sim' if n == True else 'Não' for n in pred]
    label = [
        'Treino' if portfolio[n] == True else aux[n]
        for n in range(len(portfolio))
    ]
    df_market_labeled.insert(8, 'recomendado', label)
    df_market_labeled.insert(9, 'contador', 1)

    df_treino = df_market_labeled[df_market_labeled['recomendado'] == 'Treino']
    df_sim = df_market_labeled[df_market_labeled['recomendado'] == 'Sim']
    lista_ids = pd.DataFrame(df_sim.index)

    return df_treino, df_sim, lista_ids
コード例 #2
0
def evaluate_authentication(filename, num_users):
    NUM_USERS = num_users
    df = pd.read_csv(filename)
    array = df.values
    nsamples, nfeatures = array.shape

    nfeatures = nfeatures - 1
    features = array[:, 0:nfeatures]
    labels = array[:, -1]

    userids = ['u%03d' % i for i in range(1, NUM_USERS + 1)]

    positive_userid = userids[0]
    negative_userids = userids[1:len(userids)]

    scaler = MinMaxScaler()
    auc_list = list()
    # print('NUM_USERS: '+str(NUM_USERS))
    for i in range(0, NUM_USERS):
        userid = userids[i]
        user_train_data = df.loc[df.iloc[:, -1].isin([userid])]
        # Select data for training
        user_train_data = user_train_data.drop(user_train_data.columns[-1],
                                               axis=1)
        user_array = user_train_data.values

        # print('User array shape: '+userid + '  ' + str(user_array.shape) )
        user_array = scaler.fit_transform(user_array)
        num_samples = user_array.shape[0]
        train_samples = (int)(num_samples * 0.66)
        test_samples = num_samples - train_samples

        user_train = user_array[0:train_samples, :]
        user_test = user_array[train_samples:num_samples, :]

        other_users_data = df.loc[~df.iloc[:, -1].isin([userid])]
        other_users_data = other_users_data.drop(other_users_data.columns[-1],
                                                 axis=1)
        other_users_array = other_users_data.values

        other_users_array = scaler.fit_transform(other_users_array)

        clf = OneClassSVM(gamma='auto').fit(user_train)
        clf.fit(user_train)

        pred_positive = clf.predict(user_test)
        pred_negative = clf.predict(other_users_array)

        positive_scores = clf.score_samples(user_test)
        negative_scores = clf.score_samples(other_users_array)

        auc = compute_fpr_tpr(userid,
                              positive_scores,
                              negative_scores,
                              plot=False)
        auc_list.append(auc)
    print('mean: %5.2f, std: %5.2f' % (np.mean(auc_list), np.std(auc_list)))
コード例 #3
0
def do_one_class_svm(X):
    clf = OneClassSVM(gamma=0.00001, nu=0.01).fit(X)

    pred = clf.predict(X)
    scores = clf.score_samples(X)

    return pred, scores
コード例 #4
0
ファイル: OCSVM.py プロジェクト: akiragondo1/AnomalyQCM
class OCSVMDetector(IAnomaly):
    def __init__(self, slidingWindowSize = None):
        self.slidingWindowSize = slidingWindowSize
        self.receivedSamplesNumber = 0
        self.currentSamples = []
        self.clf = OneClassSVM(nu=0.1, kernel="rbf", gamma='auto')
        self.dictHeaders = ['detectionCode', 'anomalyLikelihood', 'anomalyScore']

    def appendNewData(self, sample):
        self.currentSamples.append(float(sample["Resistance"]))
        self.receivedSamplesNumber = self.receivedSamplesNumber +1
    def detect(self, new_data):
        if self.receivedSamplesNumber < self.slidingWindowSize - 1:
            #Append all of the stabilization samples
            self.appendNewData(new_data)
            return dict(zip(self.dictHeaders, [-1, -1, -1]))
        else:
            #Remove one from current samples and add new data
            self.currentSamples.pop(0)
            self.appendNewData(new_data)
            result = self.clf.fit_predict(np.array(self.currentSamples).reshape(-1,1))[-1]
            likelihood = self.clf.score_samples(np.array(self.currentSamples).reshape(-1,1))[-1]
            return dict(zip(self.dictHeaders, [result, likelihood, -1]))
    def detectFromList(self, data):
        results = []
        print "Detecting anomalies for {} samples of data".format(data.__len__())
        for data_point in tqdm(data):
            detection = self.detect(data_point)
            result = copy.copy(data_point)
            result.update(detection)
            results.append(result)
        return results
コード例 #5
0
ファイル: anomaly.py プロジェクト: ptrubey/projgamma
 def one_class_svm(self):
     try:
         svm = OneClassSVM(gamma='auto').fit(self.data.VW)
         raw = svm.score_samples(self.data.VW)
     except AttributeError:
         try:
             svm = OneClassSVM(gamma='auto').fit(self.data.V)
             raw = svm.score_samples(self.data.V)
         except AttributeError:
             try:
                 svm = OneClassSVM(gamma='auto').fit(self.data.W)
                 raw = svm.score_samples(self.data.W)
             except AttributeError:
                 print('Where\'s the data?')
                 raise
     return raw.max() - raw + 1
コード例 #6
0
ファイル: uad.py プロジェクト: FumitoEbuchi/eblib
class OCSVM(object):
    def __init__(self, kernel='rbf', d=2, gamma=3.0, nu=0.1):
        self.kernel = kernel
        self.d = d
        self.gamma = gamma
        if (self.kernel == 'poly'):
            self.gamma = 1
        self.nu = nu

    def fit(self, train_X):
        if self.gamma == 'auto':
            self.model = OneClassSVM(kernel=self.kernel,
                                     degree=self.d,
                                     gamma='scale',
                                     coef0=1)
        else:
            self.model = OneClassSVM(kernel=self.kernel,
                                     degree=self.d,
                                     gamma=self.gamma,
                                     coef0=1)
        self.model.fit(train_X)

    def decision_function(self, X):
        return (-1) * self.model.score_samples(
            X)  #OCSVMは±0が識別面でプラス側が学習データ(正常)になるため
コード例 #7
0
def use_svm2(df_list, x_columns, **kwargs):
    svm = OneClassSVM(kernel=kwargs['kernel'])
    svm.fit(df_list[0][x_columns])

    predicted = []
    for i in range(len(df_list)):
        pred = svm.score_samples(df_list[i][x_columns])
        predicted.append(pred)

    return predicted
コード例 #8
0
ファイル: outlier_detection.py プロジェクト: scoopmans/thesis
def detect_outliers_SVM(df):
    ''' Returns the outlier scores using SVM (beware: prone to overfitting)

    Parameters:
    -----------
    df: pd.DataFrame,
    '''
    clf = OneClassSVM()
    clf.fit_predict(df)
    scores = clf.score_samples(df)
    # dec_func = clf.decision_function(df_imputed)
    return scores
コード例 #9
0
def load_datas(dataset,Processing_Unit, n=10): # 0 < n <= 100

    S = 0
    #data = pd.read_csv(dataset)
    data = dataset
    y = data.author
    authors_list = unique(y)
    authors_list.sort()
    SIZE = len(authors_list)
    roc_array = []
    for i in range (0,SIZE):

        users = data.loc[data['author'] == authors_list[i]]
        indexNames = data[data['author'] == authors_list[i]].index
        other_users_array = data.drop(indexNames)
        X = users.drop("author",axis = 1)
        other_users_array = other_users_array.drop("author",axis = 1)
        if Processing_Unit == "FUNCTION":
            X =  X.drop("function",axis=1)
            other_users_array = other_users_array.drop("function",axis = 1)
        Num_Of_Functions = users.shape[0]
        user_train = X.head(int(Num_Of_Functions*2/3))
        user_test = X.tail(Num_Of_Functions - user_train.shape[0])

        clf = OneClassSVM(gamma='scale').fit(user_train)
        clf.fit(user_train)
        positive_scores = clf.score_samples(user_test)
        negative_scores = clf.score_samples(other_users_array)

        #print(str(authors_list[i]) + " : " +str('%.2f' % compute_fpr_tpr(authors_list[i],positive_scores,negative_scores)))
        val = compute_fpr_tpr(authors_list[i],positive_scores,negative_scores)
        S+=val
        roc_array.append(val)

    avg = S/SIZE
    #print("avg : " + str('%.4f' % avg))
    w = open(settings.aux_res,'w')
    w.truncate(0)
    w.write("avg AUC : " + str('%.4f' % avg))
    w.close()
コード例 #10
0
def use_model(model, df_list, x_columns, params):
    predicted = []

    if model == 'knn':
        neigh = NearestNeighbors(n_neighbors=params['n'], p=params['p'])
        neigh.fit(df_list[0][x_columns])

        for i in range(len(df_list)):
            pred = neigh.kneighbors(df_list[i][x_columns])
            pred = [np.mean(i) for i in pred[0]]
            predicted.append(pred)

    elif model == 'svm':
        svm = OneClassSVM(kernel=params['kernel'])
        svm.fit(df_list[0][x_columns])

        for i in range(len(df_list)):
            pred = svm.score_samples(df_list[i][x_columns])
            maximum = max(pred)
            pred = [(x * -1) + maximum for x in pred]
            predicted.append(pred)

    elif model == 'ísolationForest':
        clf = IsolationForest(n_estimators=params['n_estimators'],
                              random_state=0)
        clf.fit(df_list[0][x_columns])

        for i in range(len(df_list)):
            pred = clf.score_samples(df_list[i][x_columns])
            pred = list(map(abs, pred))
            predicted.append(pred)

    elif model == 'autoencoder':
        clf = AutoEncoder(hidden_neurons=params['hidden_neurons'],
                          verbose=0,
                          random_state=0)
        clf.fit(df_list[0][x_columns])
        for i in range(len(df_list)):
            pred = clf.decision_function(df_list[i][x_columns])
            predicted.append(pred)

    elif model == 'lsanomaly':
        anomalymodel = lsanomaly.LSAnomaly(sigma=params['sigma'],
                                           rho=params['rho'])
        anomalymodel.fit(df_list[0][x_columns].to_numpy())
        for i in range(len(df_list)):
            pred = anomalymodel.predict_proba(df_list[i][x_columns].to_numpy())
            pred = [a[1] for a in pred]
            predicted.append(pred)

    return predicted
コード例 #11
0
ファイル: main.py プロジェクト: kurtukova/GP
def main():
    versus = []
    srcs = glob.glob('D:/Scripts/oneclass/ex/*.py')
    for src in srcs:
        src = read_src(src)
        features = get_features(src)
        versus.append(features)

    vs_values = [list(vs.values()) for vs in versus]
    for vec in zip(vs_values):
        vec = [[i] for i in list(vec)]
        clf = OneClassSVM(gamma='auto').fit(vec)
        print('vecs: ', clf.predict(vec))
        print('scores: ', clf.score_samples(vec))
コード例 #12
0
def svm_nd(x_train, x_test, y_test, plot_roc=False):
    # svm novelty detection
    auc_svm = []
    for kernel in tqdm(['linear', 'poly', 'rbf', 'sigmoid']):
        print(f'SVM---{kernel}---{"*" * 33}')
        for gamma in ["scale", "auto"]:
            clf = OneClassSVM(kernel=kernel, gamma=gamma, max_iter=10000)
            clf.fit(x_train)
            y_scores = clf.score_samples(x_test)
            svm = roc.area(y_test=y_test,
                           y_scores=y_scores,
                           pos_label=1,
                           title='OC-SVM - ',
                           plot_roc=plot_roc)
            auc_svm.append([(kernel, gamma), svm])
    return auc_svm
コード例 #13
0
def correct_mine():
    x = X_test.astype("float32")[:10000]
    y = y_test.reshape(-1)[:10000]
    values = y
    n_values = 10
    y = np.eye(n_values)[values]
    correct_mid, wrong_mid = cnn_profiler.get_correct_mid(input_shape,
                                                          output_shape,
                                                          x,
                                                          y,
                                                          anchor=-2)
    print(len(correct_mid), len(wrong_mid))
    clf = OneClassSVM(nu=0.1, kernel="rbf", gamma=0.5)
    clf.fit(correct_mid[:6000])
    result = clf.score_samples(correct_mid[6000:])
    # result = clf.predict(wrong_mid)
    print(len(result), len(result[result == -1]))
コード例 #14
0
 def fit(self, bags, y):
     """
     @param bags : a sequence of n bags; each bag is an m-by-k array-like
                   object containing m instances with k features
     @param y : an array-like object of length n containing -1/+1 labels
     """
     self._bags = [np.asmatrix(bag) for bag in bags]
     y = np.asmatrix(y).reshape((-1, 1))
     #        svm_X = np.vstack(self._bags)
     #        svm_y = np.vstack([float(cls) * np.matrix(np.ones((len(bag), 1)))
     #                           for bag, cls in zip(self._bags, y)])
     # Select only the negative Bag :
     list_X_neg = []
     for bag, cls in zip(self._bags, y):
         if cls == -1:
             list_X_neg += [bag]
     X_neg = np.vstack(list_X_neg)
     #        nu=0.00001 # An upper bound on the fraction of training errors
     onClassSVM = OneClassSVM()
     onClassSVM.fit(X_neg)
     #        score_samples_X_neg = onClassSVM.score_samples(X_neg) # Positive for the normal value ie negative instance here
     #        min_score = np.min(score_samples_X_neg)
     svm_X = [np.asmatrix(X_neg)]
     svm_y = [np.matrix(-np.ones((len(X_neg), 1)))]
     for bag, cls in zip(self._bags, y):
         if cls == 1:
             scores_bag = onClassSVM.score_samples(bag)
             local_y = -1. * onClassSVM.predict(bag)
             local_y[np.argmin(scores_bag)] = 1.
             local_y = np.reshape(local_y, (len(bag), 1))
             svm_X += [bag]
             svm_y += [local_y]
     svm_X = np.vstack(svm_X)
     svm_y = np.vstack(svm_y)
     #        print('Number of positive instances :',len(np.nonzero(1+svm_y)[0]),'on ',len(np.nonzero(1+y)[0]),' positive bags')
     super(MIbyOneClassSVM, self).fit(svm_X, svm_y)
コード例 #15
0
"""
Copyright (c) 2019 ground0state. All rights reserved.
License: MIT License
"""
if __name__ == '__main__':
    import numpy as np
    from sklearn.svm import OneClassSVM

    normal_data = np.loadtxt("../input/normal_data.csv", delimiter=",")
    error_data = np.loadtxt("../input/error_data.csv", delimiter=",")

    class Args():
        kernel = "rbf"
        degree = 3
        gamma = "auto"

    model = OneClassSVM(
        kernel=Args().kernel,
        degree=Args().degree,
        gamma=Args().gamma,
    ).fit(normal_data)

    y_pred = -np.log(model.score_samples(error_data))

    import matplotlib.pyplot as plt
    plt.plot(y_pred)
    plt.show()
            print('\n******LOF*******\n')
            start = time.time()
            lof = LocalOutlierFactor()
            lof.fit(X)
            end = time.time()
            time_all[j, 1] = end - start
            lof_scores = lof.negative_outlier_factor_

            print('\n******1-class SVM*******\n')
            start = time.time()
            osvm = OneClassSVM(kernel='rbf')
            osvm.fit(X)
            end = time.time()
            time_all[j, 2] = end - start
            osvm_scores = osvm.score_samples(X)

        print('\n******Our Algo*******\n')
        start = time.time()
        t1, _ = np.shape(X)
        # n_samples = int(max(t1/250,100))
        # n_samples = int(t1/50)
        n_samples = 100
        kwargs = {
            'max_depth': 10,
            'n_trees': 50,
            'max_samples': n_samples,
            'max_buckets': 3,
            'epsilon': 0.1,
            'sample_axis': 1,
            'threshold': 0
コード例 #17
0
ファイル: one_class_svm.py プロジェクト: andy94077/HYML
    model_path = args.clustering_model_path
    latents_path = args.latents_file
    training = not args.no_training
    test = args.test
    ensemble = args.ensemble
    seed = args.seed
    input_shape = (32, 32)

    latents = np.concatenate([np.load(path) for path in latents_path], axis=0)
    latents = latents.reshape(latents.shape[0], -1)
    print(f'\033[32;1mlatents: {latents.shape}\033[0m')
    np.random.seed(880301)
    if training:
        model = OneClassSVM().fit(latents)
        utils.save_model(model_path, model)
    else:
        print('\033[32;1mLoading Model\033[0m')

    model = utils.load_model(model_path)
    if test:
        pred = model.score_samples(latents)

        if ensemble:
            np.save(test, pred)
        else:
            utils.generate_csv(pred, test)
    else:
        pred = model.score_samples(latents)
        print(f'\033[32;1mValidation score: {np.mean(pred)}\033[0m')
コード例 #18
0
def evaluate_authentication( df, verbose = False):
    print(df.shape)
    userids = create_userids( df )
    NUM_USERS = len(userids)
    auc_list = list()
    eer_list = list()
    global_positive_scores = list()
    global_negative_scores = list()
    for i in range(0,NUM_USERS):
        userid = userids[i]
        user_train_data = df.loc[ df.iloc[:, -1].isin([userid]) ]
        # Select data for training
        user_train_data = user_train_data.drop(user_train_data.columns[-1], axis=1)
        user_array = user_train_data.values
 
        num_samples = user_array.shape[0]
        train_samples = (int)(num_samples * 0.66)
        test_samples = num_samples - train_samples
        # print("#train_samples: "+str(train_samples)+"\t#test_samples: "+ str(test_samples))
        user_train = user_array[0:train_samples,:]
        user_test = user_array[train_samples:num_samples,:]
     
        other_users_data = df.loc[~df.iloc[:, -1].isin([userid])]
        other_users_data = other_users_data.drop(other_users_data.columns[-1], axis=1)
        other_users_array = other_users_data.values   
        
        clf = OneClassSVM(gamma='scale')
        clf.fit(user_train)
 
        positive_scores = clf.score_samples(user_test)
        negative_scores =  clf.score_samples(other_users_array)   
        
        # Aggregating positive scores
        y_pred_positive = positive_scores
        for i in range(len(positive_scores) - AGGREGATE_BLOCK_NUM + 1):
            y_pred_positive[i] = np.average(y_pred_positive[i : i + AGGREGATE_BLOCK_NUM], axis=0)

        # Aggregating negative scores
        y_pred_negative = negative_scores
        for i in range(len(negative_scores) - AGGREGATE_BLOCK_NUM + 1):
            y_pred_negative[i] = np.average(y_pred_negative[i : i + AGGREGATE_BLOCK_NUM], axis=0)

        auc, eer = compute_AUC_EER(y_pred_positive, y_pred_negative)
        # auc, eer = compute_AUC_EER(positive_scores, negative_scores )

        global_positive_scores.extend(positive_scores)
        global_negative_scores.extend(negative_scores)

        if  verbose == True:
            print(str(userid)+", "+ str(auc)+", "+str(eer) )
         
        auc_list.append(auc)
        eer_list.append(eer)
    print('AUC  mean : %7.4f, std: %7.4f' % ( np.mean(auc_list), np.std(auc_list)) )
    print('EER  mean:  %7.4f, std: %7.4f' % ( np.mean(eer_list), np.std(eer_list)) )
    
    if verbose == True:
        global_auc, global_eer = compute_AUC_EER(global_positive_scores, global_negative_scores)
        print("Global AUC: "+str(global_auc))
        print("Global EER: "+str(global_eer))
    return auc_list, eer_list
acc_score = accuracy_score(Y1_train, Y1_pred_train)
print(f'Accuratezza sul train set: {acc_score}')

prec_score = precision_score(Y1_train, Y1_pred_train)
print('Precisione sul train set: %.3f' % prec_score)

rec_score = recall_score(Y1_train, Y1_pred_train)
print('Recall sul train set: %.3f' % rec_score)

F1_score = f1_score(Y1_train, Y1_pred_train)
print('F1 score sul train set: %.3f' % F1_score)

#box plot

df_train = clf.decision_function(X1_train_n)
score_samples_train = clf.score_samples(X1_train_n)

plt.scatter(df_train, np.arange(0, 26, 1), s=5)
plt.axvline(x=0, color='red')
plt.show()

#TEST SET

#matrice di confusione

confmat = confusion_matrix(y_true=Y_TEST, y_pred=Y_pred_TEST)

fig, ax = plt.subplots(figsize=(2.5, 2.5))
ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3)
for i in range(confmat.shape[0]):
    for j in range(confmat.shape[1]):
コード例 #20
0
ファイル: oneclass.py プロジェクト: margitantal68/sapimouse
def evaluate_authentication_train_test(df_train,
                                       df_test,
                                       data_type,
                                       num_blocks,
                                       representation_type,
                                       verbose=False,
                                       roc_data=False,
                                       roc_data_filename=TEMP_NAME):
    print("Training: " + str(df_train.shape))
    print("Testing: " + str(df_test.shape))
    userids = create_userids(df_train)
    NUM_USERS = len(userids)
    auc_list = list()
    eer_list = list()
    global_positive_scores = list()
    global_negative_scores = list()
    for i in range(0, NUM_USERS):
        userid = userids[i]
        user_train_data = df_train.loc[df_train.iloc[:, -1].isin([userid])]
        # Select data for training
        user_train_data = user_train_data.drop(user_train_data.columns[-1],
                                               axis=1)
        user_array = user_train_data.values
        # train_samples = user_array.shape[0]

        user_test_data = df_test.loc[df_test.iloc[:, -1].isin([userid])]
        user_test_data = user_test_data.drop(user_test_data.columns[-1],
                                             axis=1)
        # test_samples = user_test_data.shape[0]

        other_users_data = df_test.loc[~df_test.iloc[:, -1].isin([userid])]
        other_users_data = other_users_data.drop(other_users_data.columns[-1],
                                                 axis=1)
        # other_users_array = other_users_data.values

        # if (verbose == True):
        # print(str(userid)+". #train_samples: "+str(train_samples)+"\t#positive test_samples: "+ str(test_samples))

        clf = OneClassSVM(gamma='scale')
        clf.fit(user_train_data)

        positive_scores = clf.score_samples(user_test_data)
        negative_scores = clf.score_samples(other_users_data)

        # Aggregating positive scores
        y_pred_positive = positive_scores
        for i in range(len(positive_scores) - num_blocks + 1):
            y_pred_positive[i] = np.average(y_pred_positive[i:i + num_blocks],
                                            axis=0)

        # Aggregating negative scores
        y_pred_negative = negative_scores
        for i in range(len(negative_scores) - num_blocks + 1):
            y_pred_negative[i] = np.average(y_pred_negative[i:i + num_blocks],
                                            axis=0)

        auc, eer, _, _ = compute_AUC_EER(y_pred_positive, y_pred_negative)

        if SCORE_NORMALIZATION == True:
            positive_scores, negative_scores = score_normalization(
                positive_scores, negative_scores)

        global_positive_scores.extend(positive_scores)
        global_negative_scores.extend(negative_scores)

        if verbose == True:
            print(str(userid) + ", " + str(auc) + ", " + str(eer))

        auc_list.append(auc)
        eer_list.append(eer)
    print("\nNumber of blocks: ", num_blocks)
    print('AUC  mean : %7.4f, std: %7.4f' %
          (np.mean(auc_list), np.std(auc_list)))
    print('EER  mean:  %7.4f, std: %7.4f' %
          (np.mean(eer_list), np.std(eer_list)))

    print("#positives: " + str(len(global_positive_scores)))
    print("#negatives: " + str(len(global_negative_scores)))

    global_auc, global_eer, fpr, tpr = compute_AUC_EER(global_positive_scores,
                                                       global_negative_scores)

    filename = 'output_png/scores_' + str(data_type.value) + '_' + str(
        representation_type.value)
    if SCORES == True:
        # ****************************************************************************************
        plot_scores(global_positive_scores,
                    global_negative_scores,
                    filename,
                    title='Scores distribution')
        # ****************************************************************************************

    if (roc_data == True):
        dict = {'FPR': fpr, 'TPR': tpr}
        df = pd.DataFrame(dict)
        df.to_csv(roc_data_filename, index=False)

    print(data_type.value + " Global AUC: " + str(global_auc))
    print(data_type.value + " Global EER: " + str(global_eer))
    return auc_list, eer_list
コード例 #21
0
def use_svm(df_x_train, df_x_test, c=1):
    clf = OneClassSVM(kernel='sigmoid').fit(df_x_train)
    svm = clf.score_samples(df_x_test)
    return svm
acc_score = accuracy_score(Y1_train, Y1_pred_train)
print(f'Accuratezza sul train set: {acc_score}')

prec_score = precision_score(Y1_train, Y1_pred_train)
print('Precisione sul train set: %.3f' % prec_score)

rec_score = recall_score(Y1_train, Y1_pred_train)
print('Recall sul train set: %.3f' % rec_score)

F1_score = f1_score(Y1_train, Y1_pred_train)
print('F1 score sul train set: %.3f' % F1_score)

#box plot

df_train = clf.decision_function(X1_train_n_reduced)
score_samples_train = clf.score_samples(X1_train_n_reduced)

plt.scatter(df_train, np.arange(0, 26, 1), s=5)
plt.axvline(x=0, color='red')
plt.show()

#TEST SET

#matrice di confusione

confmat = confusion_matrix(y_true=Y_TEST, y_pred=Y_pred_TEST)

fig, ax = plt.subplots(figsize=(2.5, 2.5))
ax.matshow(confmat, cmap=plt.cm.Blues, alpha=0.3)
for i in range(confmat.shape[0]):
    for j in range(confmat.shape[1]):
コード例 #23
0
def evaluate_authentication_cross_day( df1, df2, verbose = False ):
    print("Session 1 shape: "+str(df1.shape))
    print("Session 2 shape: "+str(df2.shape))
        
    userids = create_userids( df1 )
    NUM_USERS = len(userids)
    
    global_positive_scores = list()
    global_negative_scores = list()
    auc_list = list()
    eer_list = list()
    for i in range(0,NUM_USERS):
        userid = userids[i]

        user_session1_data = df1.loc[df1.iloc[:, -1].isin([userid])]
        user_session2_data = df2.loc[df2.iloc[:, -1].isin([userid])]
      
        user_session1_data = user_session1_data.drop(user_session1_data.columns[-1], axis=1)
        user_session1_array = user_session1_data.values
 
        # positive test data
        user_session2_data =  user_session2_data.drop(user_session2_data.columns[-1], axis=1) 
        user_session2_array = user_session2_data.values

        # negative test data
        other_users_session2_data = df2.loc[~df2.iloc[:, -1].isin([userid])]
        other_users_session2_data = other_users_session2_data.drop(other_users_session2_data.columns[-1], axis=1)
        other_users_session2_array = other_users_session2_data.values   
        
        clf = OneClassSVM(gamma='scale')
        clf.fit(user_session1_array)
 
        positive_scores = clf.score_samples(user_session2_array)
        negative_scores =  clf.score_samples(other_users_session2_array)   

        # Aggregating positive scores
        y_pred_positive = positive_scores
        for i in range(len(positive_scores) - AGGREGATE_BLOCK_NUM + 1):
            y_pred_positive[i] = np.average(y_pred_positive[i : i + AGGREGATE_BLOCK_NUM], axis=0)

        # Aggregating negative scores
        y_pred_negative = negative_scores
        for i in range(len(negative_scores) - AGGREGATE_BLOCK_NUM + 1):
            y_pred_negative[i] = np.average(y_pred_negative[i : i + AGGREGATE_BLOCK_NUM], axis=0)

        auc, eer = compute_AUC_EER(y_pred_positive, y_pred_negative)

        
        # auc, eer = compute_AUC_EER(positive_scores, negative_scores )
 
        global_positive_scores.extend(positive_scores)
        global_negative_scores.extend(negative_scores)

        if verbose == True:
            print(str(userid)+": "+ str(auc)+", "+str(eer) )
        auc_list.append(auc)
        eer_list.append(eer)
    print('AUC  mean : %7.4f, std: %7.4f' % ( np.mean(auc_list), np.std(auc_list)) )
    print('EER  mean:  %7.4f, std: %7.4f' % ( np.mean(eer_list), np.std(eer_list)) )

    if verbose == True:
        global_auc, global_eer = compute_AUC_EER(global_positive_scores, global_negative_scores)
        print("Global AUC: "+str(global_auc))
        print("Global EER: "+str(global_eer))
    return auc_list, eer_list
コード例 #24
0
print(X_test.shape)
#import pickle
import joblib
from sklearn.svm import OneClassSVM
#from sklearn.model_selection import GridSearchCV, ParameterGrid

eplison = 0.001
gamma = 0.0001
nu = 0.001
one_svm_rbf = OneClassSVM(nu=nu, kernel='rbf', gamma=gamma, tol=eplison)
one_svm_rbf.fit(X_train)
what_kernel = 'rbf'

print('testing data------------------------------------------------')
Y_result_rbf = one_svm_rbf.predict(X_test)
Y_scroe_rbf = one_svm_rbf.score_samples(X_test)
print('test data size :{}'.format(X_test.shape[0]))
print('test data anomaly : {}'.format(np.sum(Y_result_rbf == -1)))
print('rbf:{}'.format(np.sum(Y_result_rbf == -1) / len(Y_result_rbf) * 100))

print('traning data------------------------------------------------')

Y_result_rbf_t = one_svm_rbf.predict(X_train)
Y_scroe_rbf_t = one_svm_rbf.score_samples(X_train)
print('train data size :{}'.format(X_train.shape[0]))
print('train data anomaly : {}'.format(np.sum(Y_result_rbf_t == -1)))
print('rbf:{}'.format(
    np.sum(Y_result_rbf_t == -1) / len(Y_result_rbf_t) * 100))

print('all data------------------------------------------------')
eplison = 0.001
コード例 #25
0
ファイル: oc_svm.py プロジェクト: JuneKyu/CLAD
def main():

    #  np.random.seed(777)

    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path', type=str, default='../data')
    parser.add_argument('--dataset_name', type=str, default='swat')
    parser.add_argument('--normal_class_index_list', nargs='+',
                        default=[0])  # get a list of normal class indexes
    parser.add_argument('--cluster_num', type=int, default=5)
    parser.add_argument('--n_hidden_features', type=int, default=10)
    parser.add_argument('--cluster_type', type=str, default='gmm')
    parser.add_argument('--dec_pretrain_lr', type=float, default=0.01)
    parser.add_argument('--dec_train_epochs', type=int, default=100)
    parser.add_argument('--dec_train_lr', type=float, default=0.01)
    parser.add_argument('--save_cluster_model', type=str2bool, default=False)
    parser.add_argument('--load_cluster_model', type=str2bool, default=False)
    parser.add_argument('--classifier', type=str, default='linear')
    parser.add_argument('--classifier_epochs', type=int, default=200)
    parser.add_argument('--classifier_lr', type=float, default=0.01)
    parser.add_argument('--save_classifier_model',
                        type=str2bool,
                        default=False)
    parser.add_argument('--load_classifier_model',
                        type=str2bool,
                        default=False)
    parser.add_argument('--temperature', type=float, default=1000)
    parser.add_argument('--perturbation', type=float, default=0.001)
    parser.add_argument('--plot_clustering', type=str2bool, default=False)

    args = parser.parse_args()

    data_path = args.data_path
    dataset_name = args.dataset_name
    # if image data, set rgb flag
    if (dataset_name in config.rgb_datasets):
        config.is_rgb = True
        config.cvae_channel = 3
    # if text data, set sentence embedding
    normal_class_index_list = args.normal_class_index_list
    normal_class_index_list = [int(i) for i in normal_class_index_list]
    config.normal_class_index_list = normal_class_index_list
    cluster_num = args.cluster_num
    config.cluster_num = cluster_num
    n_hidden_features = args.n_hidden_features
    config.n_hidden_features = n_hidden_features
    cluster_type = args.cluster_type
    config.cluster_type = cluster_type
    config.save_cluster_model = args.save_cluster_model
    config.load_cluster_model = args.load_cluster_model
    classifier = args.classifier
    config.classifier = classifier
    config.classifier_epochs = args.classifier_epochs
    config.classifier_lr = args.classifier_lr
    config.save_classifier_model = args.save_classifier_model
    config.load_classifier_model = args.load_classifier_model
    temperature = args.temperature
    config.temperature = temperature
    perturbation = args.perturbation
    config.perturbation = perturbation
    config.plot_clustering = args.plot_clustering

    # logger
    log = config.logger
    log_path = config.log_path
    if os.path.exists(log_path) == False:
        os.makedirs(log_path)
    sub_log_path = config.sub_log_path
    if os.path.exists(sub_log_path) == False:
        os.makedirs(sub_log_path)
    fileHandler = logging.FileHandler(\
            os.path.join(sub_log_path, config.current_time + '-' +\
            dataset_name + '-' +\
            cluster_type + '-' +\
            classifier + '.txt'))
    fileHandler.setFormatter(config.formatter)
    config.logger.addHandler(fileHandler)

    log.info("-" * 99)
    log.info("-" * 10 + str(args) + "-" * 10)
    log.info("-" * 99)
    log.info('START %s:%s:%s\n' %
             (datetime.datetime.now().hour, datetime.datetime.now().minute,
              datetime.datetime.now().second))
    log.info('%s:%s:%s\n' %
             (datetime.datetime.now().hour, datetime.datetime.now().minute,
              datetime.datetime.now().second))

    print("dataset name : " + dataset_name)
    log.info("dataset name : " + dataset_name)
    print("classifier : " + classifier)
    log.info("classifier : " + classifier)

    print("normal_class_index_list : {}".format(normal_class_index_list))
    log.info("normal_class_index_list : {}".format(normal_class_index_list))
    print("n_hidden_features : {}".format(n_hidden_features))
    log.info("n_hidden_features : {}".format(n_hidden_features))
    print("temperature : {}".format(temperature))
    log.info("temperature : {}".format(temperature))
    print("perturbation : {}".format(perturbation))
    log.info("perturbation : {}".format(perturbation))

    # loading dataset
    dataset = load_dataset(dataset_name=dataset_name, data_path=data_path)

    print("")
    print("dataset loading successful!")
    log.info("dataset loading successful")

    train_x = dataset["train_x"]
    train_y = dataset["train_y"]
    test_in = dataset["test_in"]
    test_out = dataset["test_out"]

    print(dataset_name)
    print(normal_class_index_list)

    cls = OneClassSVM(gamma='auto')

    train_x_list = []
    for x in train_x:
        x = x.view(-1).numpy()
        train_x_list.append(x)

    print("fitting to one_class_svm")
    cls.fit(train_x_list)

    test_in_pred = []
    for t_i in test_in:
        t_i = t_i.view(-1).numpy()
        test_in_pred.append(t_i)

    print("predicting test_in")
    #  test_in_pred = cls.predict(test_in_pred)
    test_in_pred = cls.score_samples(test_in_pred)

    test_out_pred = []
    for t_o in test_out:
        t_o = t_o.view(-1).numpy()
        test_out_pred.append(t_o)

    print("predicting test_out")
    #  test_out_pred = cls.predict(test_out_pred)
    test_out_pred = cls.score_samples(test_out_pred)

    labels = [0 for i in range(len(test_in_pred))
              ] + [1 for i in range(len(test_out_pred))]
    fpr, tpr, thresholds = roc_curve(labels,
                                     test_in_pred.tolist() +
                                     test_out_pred.tolist(),
                                     pos_label=0)
    auroc = auc(fpr, tpr)
    print(auroc)
コード例 #26
0
ファイル: train.py プロジェクト: zzhaozeng/google-research
 def eval_embed(self, trainset, testset):
   """Evaluate performance on test set."""
   _, _, embeds_tr, pools_tr, _ = self.extract(trainset)
   probs, dscores, embeds, pools, labels = self.extract(testset)
   sim_embed = -0.5 * self.squared_difference(embeds, embeds_tr, True)
   sim_pool = -0.5 * self.squared_difference(pools, pools_tr, True)
   dist_embed = tf.reduce_mean(1.0 - tf.nn.top_k(sim_embed, k=1)[0], axis=1)
   dist_pool = tf.reduce_mean(1.0 - tf.nn.top_k(sim_pool, k=1)[0], axis=1)
   for key in self.eval_metrics:
     if key.startswith('logit'):
       pred = 1.0 - probs[:, 0]
     elif key.startswith('dscore'):
       pred = 1.0 - dscores
     elif key.startswith('embed'):
       pred = dist_embed
       feats_tr = embeds_tr.numpy()
       feats = embeds.numpy()
       sim = sim_embed
     elif key.startswith('pool'):
       pred = dist_pool
       feats_tr = pools_tr.numpy()
       feats = pools.numpy()
       sim = sim_pool
     if 'auc' in key:
       self.eval_metrics[key] = util_metric.roc(pr=pred, gt=labels)
     elif 'locsvm' in key and key.startswith(('embed', 'pool')):
       # Linear kernel OC-SVM.
       clf = OneClassSVM(kernel='linear').fit(feats_tr)
       scores = -clf.score_samples(feats)
       self.eval_metrics[key] = util_metric.roc(pr=scores, gt=labels)
     elif 'kocsvm' in key and key.startswith(('embed', 'pool')):
       # RBF kernel OC-SVM.
       feats_tr = tf.nn.l2_normalize(feats_tr, axis=1)
       feats = tf.nn.l2_normalize(feats, axis=1)
       # 10 times larger value of gamma.
       gamma = 10. / (tf.math.reduce_variance(feats_tr) * feats_tr.shape[1])
       clf = OneClassSVM(kernel='rbf', gamma=gamma).fit(feats_tr)
       scores = -clf.score_samples(feats)
       self.eval_metrics[key] = util_metric.roc(pr=scores, gt=labels)
     elif 'kde' in key and key.startswith(('embed', 'pool')):
       # RBF kernel density estimation.
       feats_tr = tf.nn.l2_normalize(feats_tr, axis=1)
       gamma = 10. / (tf.math.reduce_variance(feats_tr) * feats_tr.shape[1])
       scores = None
       batch_size_for_kde = 100
       num_iter = int(np.ceil(sim.shape[0] / batch_size_for_kde))
       for i in range(num_iter):
         sim_batch = sim[i * batch_size_for_kde:(i + 1) * batch_size_for_kde]
         scores_batch = -tf.divide(
             tf.reduce_logsumexp(2 * gamma * sim_batch, axis=1), gamma)
         scores = scores_batch if scores is None else tf.concat(
             (scores, scores_batch), axis=0)
       self.eval_metrics[key] = util_metric.roc(pr=scores, gt=labels)
     elif 'gde' in key and key.startswith(('embed', 'pool')):
       # Gaussian density estimation with full covariance.
       feats_tr = tf.nn.l2_normalize(feats_tr, axis=1)
       feats = tf.nn.l2_normalize(feats, axis=1)
       km = GMM(n_components=1, init_params='kmeans', covariance_type='full')
       km.fit(feats_tr)
       scores = -km.score_samples(feats)
       self.eval_metrics[key] = util_metric.roc(pr=scores, gt=labels)
コード例 #27
0
ファイル: SVM1C_sklearn_actives.py プロジェクト: etibonuni/VS
            componentResults.append((0, 0))

        train_ds = cu.lumpRecords(n_fold_ds)
        svm1c = OneClassSVM()

        train_a = train_ds[train_ds["active"] == True]

        #ann.fit(train_ds.iloc[:, 0:numcols], train_ds.iloc[:, numcols])
        svm1c.fit(train_a.iloc[:, 0:numcols], None)
        #    G_a = GaussianMixture(n_components=best_components, covariance_type="full").fit(train_ds.iloc[:, 0:numcols],
        #                                                                               train_ds.iloc[:, numcols])

        results = pd.DataFrame()

        results["score"] = [
            max(svm1c.score_samples(x[0].iloc[:, 0:numcols])) for x in test_ds
        ]
        #results["a_score"] = [G_a.score(x[0].iloc[:, 0:numcols]) for x in test_ds]
        results["truth"] = [x[2] for x in test_ds]  #np.array(test_ds)[:, 2]
        molName = molfiles[molNdx][
            1]  #[molfiles[molNdx].rfind("/", 0, -1)+1:-1]
        auc = eval.plotSimROC(
            results["truth"], [results["score"]],
            molName + "[1C-SVM, " + str(portion * 100) + "%]",
            molName + "_1CSVM_sim_" + str(portion * 100) + ".pdf")
        auc_rank = eval.plotRankROC(
            results["truth"], [results["score"]],
            molName + "[1C-SVM, " + str(portion * 100) + "%]",
            molName + "_1CSVM_rank_" + str(portion * 100) + ".pdf")
        mean_ef = eval.getMeanEFs(np.array(results["truth"]),
                                  np.array([results["score"]]),
コード例 #28
0
def train(loader, epoch, model_list, method='ocsvm'):
    # 大于阈值表示属于正常
    # model_list 对需要多轮训练的模型有效, 传入上一次训练的模型,例如ocnn
    datas, labels = get_features(loader)

    threshold_list = []
    update_models = []
    update_optimizer = []
    clf_list, optimizers = model_list

    for label in range(args.class_num):  # 为每个类别拟合ocsvm模型
        condition_index = np.where(labels == label)[0]
        fit_data = datas[condition_index]  # 标签label的训练数据
        optimizer = optimizers[label]

        if method == 'ocsvm':
            clf = OneClassSVM()
        elif method == 'isofore':
            clf = IsolationForest()
        elif method == 'gmm':
            clf = BayesianGaussianMixture()
        elif method == 'svdd':
            clf = SVDD(parameters)
        elif method == 'lof':
            clf = LocalOutlierFactor(novelty=True,
                                     n_neighbors=int(fit_data.size * 0.1))
        elif method == 'cnn':
            clf = ''
        elif method != 'sp':
            clf = clf_list[label]

        # 训练异常检测模型
        if method == 'ocnn':
            clf, optimizer = fit(clf, fit_data, optimizer, epoch)
            scores_temp = score_samples(clf, fit_data, epoch)
        elif method == 'lof':
            clf.fit(fit_data)
            scores_temp = clf.decision_function(fit_data)
        elif method == 'sp':
            pass
        elif method == 'cnn':
            pass
        else:
            clf.fit(fit_data)
            scores_temp = clf.score_samples(fit_data)

        # 异常检测模型阈值的计算
        if method != 'sp' and method != 'gmm' and method != 'cnn':
            threshold = np.mean(scores_temp) - \
                args.threshold_std_times*np.std(scores_temp)
            update_optimizer.append(optimizer)
            update_models.append(clf)
            threshold_list.append(threshold)
        elif method == 'gmm':
            threshold = np.mean(scores_temp)
            update_optimizer.append(optimizer)
            update_models.append(clf)
            threshold_list.append(threshold)
        elif method == 'sp':
            from cnn import get_c_v
            threshold_list = get_c_v(p_s=datas, labels=labels)
        elif method == 'cnn':
            threshold_list = ''

    model_list = (update_models, optimizers)
    return model_list, threshold_list
コード例 #29
0
def sklearn_oneclass(featureset, target, classnum):  #特征,目标,对第k个目标训练模型,从1开始
    meanacc = []
    meanfar = []
    meanfrr = []
    for t in range(0, 10):
        train_data, test_data, train_target, test_target = train_test_split(
            featureset,
            target,
            test_size=0.2,
            random_state=t * 30,
            stratify=target)

        train_data, test_data, sort = IAtool.minepro(train_data, test_data,
                                                     train_target, 30)

        # print("进入第",t,"轮分类的线性判别式分析阶段")
        # train_data,test_data,lda_bar,lda_scaling=IAtool.ldapro(train_data,test_data,train_target)

        # print("进入第",t,"轮分类的主成分分析阶段")
        # oneclasstraindata,oneclasstestdata=IAtool.pcapro(oneclasstraindata,oneclasstestdata,8)

        print("进入第", t, "轮分类的oneclass阶段")

        oneclasstraindata = []
        oneclasstestdata = test_data
        oneclasstesttarget = []

        for k in range(len(train_target)):
            if train_target[k] == (classnum):
                oneclasstraindata.append(train_data[k])
        for k in range(len(test_target)):
            if test_target[k] == (classnum):
                oneclasstesttarget.append(1)
            else:
                oneclasstesttarget.append(-1)

        clf = OneClassSVM(nu=0.02).fit(oneclasstraindata)
        # clf = EllipticEnvelope(random_state=0).fit(oneclasstraindata)
        # clf = LocalOutlierFactor(n_neighbors=20, novelty=True, contamination=0.1).fit(oneclasstraindata)
        # clf = IsolationForest(random_state=0,max_features=len(oneclasstraindata[0]),bootstrap=True).fit(oneclasstraindata)

        # joblib.dump(clf, 'model.pkl')

        result = clf.predict(oneclasstestdata)
        scores = clf.score_samples(oneclasstestdata)
        # score = clf.predict_proba(featureset)
        dist = clf.decision_function(oneclasstestdata)
        print('原结果:', oneclasstesttarget)
        print('预测结果:', result)
        print('预测分数:', scores)
        print('模型距离:', dist)

        tp, tn, fp, fn = one_accuracy_score(
            oneclasstesttarget, dist, 0
        )  #result_scores: onesvm, 1. EllipticEnvelope,-80. IsolationForest,-0.64.LocalOutlierFactor -1
        # tp,tn,fp,fn=one_accuracy_result(oneclasstesttarget,result)
        print(tp, tn, fp, fn)
        accuracy = (tp + tn) / (tp + tn + fp + fn)
        far = (fp) / (fp + tn)
        frr = (fn) / (fn + tp)
        print("accuracy:", accuracy, "far:", far, "frr:", frr)
        meanacc.append(accuracy)
        meanfar.append(far)
        meanfrr.append(frr)
    print("meanacc:", np.mean(meanacc), "meanfar:", np.mean(meanfar),
          "meanfrr:", np.mean(meanfrr))
コード例 #30
0
                                            window_size,
                                            subset='test')
    data_test_anomaly = FirmaData_select_subjects(data_folder_dir,
                                                  window_size,
                                                  par_anodata[0],
                                                  par_anodata[1],
                                                  par_anodata[2],
                                                  test_subject_list,
                                                  subset='train')
    data_train = data_train.datamat
    data_test_normal = data_test_normal.datamat
    data_test_anomaly = data_test_anomaly.data
    data_test_anomaly = data_test_anomaly
    model.fit(data_train)

    score_train = model.score_samples(data_train)
    score_test_normal = model.score_samples(data_test_normal)
    score_test_anomaly = model.score_samples(data_test_anomaly)

    predict_train = model.predict(data_train)
    predict_train = 1 * predict_train > 0
    predict_test_normal = 1 * model.predict(data_test_normal) > 0
    predict_test_anomaly = 1 * model.predict(data_test_anomaly) > 0
    #    print(np.mean(score_train),np.mean(score_test_normal),np.mean(score_test_anomaly))

    N_test_normal = data_test_normal.shape[0]
    #    N_test_anomaly=data_test_anomaly.shape[0]
    N_test_anomaly = len(data_test_anomaly)
    N_train = data_train.shape[0]

    TP = np.sum(predict_test_normal)