コード例 #1
0
def gammaExample():
    # Generate train data
    X = 0.3 * np.random.randn(100, 2)
    X_train = np.r_[X + 2, X - 2]
    pickle.dump(X_train,
                open("/Users/LT/Documents/Uni/MA/increOCSVM/Xtrain.p", "w+"))
    #X_train = pickle.load(open("/Users/LT/Documents/Uni/MA/increOCSVM/Xtrain.p", 'r+'))

    # Generate some regular novel observations
    X = 0.3 * np.random.randn(15, 2)
    X_test = np.r_[X + 2, X - 2]

    # Generate some abnormal novel observations
    X_outliers = np.random.uniform(low=-4, high=4, size=(15, 2))

    # Train the data
    clf = ocsvm.OCSVM("rbf", nu=0.1, gamma=2.1)
    clf.train(X_train)

    #print "alpha_s: %s" % clf._data.alpha_s()

    #Plot the data
    plot(clf, X_train, X_test, X_outliers, 100, False)
    plt.figure()
    clf = ocsvm.OCSVM("rbf", nu=0.1, gamma=0.0005)
    clf.train(X_train)

    #Plot the data
    plot(clf, X_train, X_test, X_outliers, 100, False)
    plt.show()
コード例 #2
0
ファイル: evaluation.py プロジェクト: feuerchop/increOCSVM
def grid_search_sklearn(X,
                        label,
                        split=0.8,
                        nu_range=[0.05 * i for i in range(1, 20)],
                        gamma_range=[
                            0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3,
                            1, 3, 10, 30
                        ],
                        novelty=novelty,
                        verbose=True):
    all_data = []
    train_split = int(floor(X.shape[0] * split))
    nu_gamma_f1 = [0, 0, 0]
    for nu in nu_range:
        for gamma in gamma_range:
            clf = ocsvm.OCSVM("rbf", nu=nu, gamma=gamma)
            try:
                clf.fit(X[:train_split], scale=nu * len(X[:train_split]))
                expected = np.asarray(label) * novelty
                predicted = clf.predict(X) * novelty
                precision, recall, f1score, support = precision_recall_fscore_support(
                    expected, predicted, average='binary')
                #print "nu: %s, gamma: %s -> precision: %s, recall: %s, f1: %s" % (nu, gamma, precision, recall, f1score)
                all_data.append([nu, gamma, precision, recall, f1score])
                if f1score > nu_gamma_f1[2] and recall != 1.0:
                    nu_gamma_f1 = [nu, gamma, f1score]
            except Exception, e:
                print "train error"
                continue
コード例 #3
0
ファイル: evaluation.py プロジェクト: feuerchop/increOCSVM
def evaluate_dataset(X, label, mnist=False, dataset=None):
    split = 0.8
    train_split = int(floor(X.shape[0] * split))
    nu_gamma_f1, kfold = grid_search_incre(
        X,
        label,
        split=split,
        nu_range=[0.01 * i for i in range(80, 95)],
        gamma_range=[0.01, 0.03, 0.1, 0.3, 1, 3])  #,
    #nu_range=[0.05*i for i in range(1,11)],
    #gamma_range=[0.01, 0.03, 0.1, 0.3, 1, 3])
    pickle.dump(kfold, open('datasets/kddcup99/best_kfold_%s.p' % dataset,
                            'w+'))
    # train with best
    print "nu_gamma_f1: %s" % nu_gamma_f1
    clf = ocsvm.OCSVM("rbf", nu=nu_gamma_f1[0], gamma=nu_gamma_f1[1])
    clf.fit(X[:train_split], scale=nu_gamma_f1[0] * len(X[:train_split]))

    expected = np.asarray(label) * novelty
    if mnist:
        expected = expected.tolist()
        expected = [e[0] for e in expected]
    predicted = clf.predict(X) * novelty

    confusion = output_cf(expected, predicted)
    print("Confusion matrix:\n%s" % confusion)
    #print("Confusion matrix:\n%s" % confusion_matrix(expected, predicted))
    precision, recall, f1score, support = precision_recall_fscore_support(
        expected, predicted, average='binary')
    print "precision: %s, recall: %s, f1-score: %s" % (precision, recall,
                                                       f1score)
コード例 #4
0
def standardExample():
    # Generate train data
    X = 0.3 * np.random.randn(100, 2)
    X_train = np.r_[X + 2, X - 2]
    pickle.dump(X_train,
                open("/Users/LT/Documents/Uni/MA/increOCSVM/Xtrain.p", "w+"))
    #X_train = pickle.load(open("/Users/LT/Documents/Uni/MA/increOCSVM/Xtrain.p", 'r+'))

    # Generate some regular novel observations
    X = 0.3 * np.random.randn(15, 2)
    X_test = np.r_[X + 2, X - 2]

    # Generate some abnormal novel observations
    X_outliers = np.random.uniform(low=-4, high=4, size=(15, 2))

    # Train the data
    clf = ocsvm.OCSVM("rbf", nu=0.1, gamma=2.1, e=1e-8)
    clf.train(X_train)

    #print "alpha_s: %s" % clf._data.alpha_s()

    #Plot the data
    plot(clf, X_train, X_test, X_outliers, 100, False)
    goldExample(X_train, X_test, X_outliers)
    plt.show()
    #plt.savefig('test.pdf')

    # new point
    X = 0.3 * np.random.randn(1, 2)
    X_new = np.r_[X + 5, X - 5]
    #print X_new[0]

    mu_all = -clf.gram(clf._data.Xs(), clf._data.X()).dot(clf._data.alpha())
コード例 #5
0
ファイル: evaluation.py プロジェクト: feuerchop/increOCSVM
def evaluate_kdd99(X, label, nu, gamma):
    split = 0.8
    print "grid search incremental"
    all_data = []
    train_split = int(floor(X.shape[0] * split))
    nu_start = 0.975
    start_train_size = ceil(len(X) * split * nu / nu_start)
    clf = ocsvm.OCSVM("rbf", nu=nu, gamma=gamma)

    clf.fit(X[:train_split][:start_train_size],
            scale=nu * len(X[:train_split][:start_train_size]))
    clf.increment(X[:train_split][start_train_size:])
    expected = np.asarray(label) * novelty
    predicted = clf.predict(X) * novelty
    precision, recall, f1score, support = precision_recall_fscore_support(
        expected, predicted, average='binary')
    print "nu: %s, gamma: %s -> precision: %s, recall: %s, f1: %s" % (
        nu, gamma, precision, recall, f1score)
コード例 #6
0
ファイル: evaluation.py プロジェクト: feuerchop/increOCSVM
def classify_random_data(X_train, X_test, X_outliers, variance=20):
    # fit the model
    xx, yy = np.meshgrid(np.linspace(-variance, variance, 500),
                         np.linspace(-variance, variance, 500))
    #clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.1)
    clf = ocsvm.OCSVM(nu=0.1, gamma=0.1)
    #clf.fit(X_train)
    clf.fit(X_train, scale=0.1 * X_train.shape[0])
    y_pred_train = clf.predict(X_train)
    y_pred_test = clf.predict(X_test)
    y_pred_outliers = clf.predict(X_outliers)
    n_error_train = y_pred_train[y_pred_train == -1].size
    n_error_test = y_pred_test[y_pred_test == -1].size
    n_error_outliers = y_pred_outliers[y_pred_outliers == 1].size

    # plot the line, the points, and the nearest vectors to the plane
    Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.title("Novelty Detection")
    plt.contourf(xx,
                 yy,
                 Z,
                 levels=np.linspace(Z.min(), 0, 7),
                 cmap=plt.cm.Blues_r)
    a = plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors='red')
    plt.contourf(xx, yy, Z, levels=[0, Z.max()], colors='orange')

    b1 = plt.scatter(X_train[:, 0], X_train[:, 1], c='white')
    b2 = plt.scatter(X_test[:, 0], X_test[:, 1], c='green')
    c = plt.scatter(X_outliers[:, 0], X_outliers[:, 1], c='red')
    plt.axis('tight')
    plt.xlim((-variance, variance))
    plt.ylim((-variance, variance))
    plt.legend([a.collections[0], b1, b2, c], [
        "learned frontier", "training observations",
        "new regular observations", "new abnormal observations"
    ],
               loc="upper left",
               prop=matplotlib.font_manager.FontProperties(size=11))
    plt.xlabel("error train: %d/200 ; errors novel regular: %d/40 ; "
               "errors novel abnormal: %d/40" %
               (n_error_train, n_error_test, n_error_outliers))
    plt.show()
コード例 #7
0
ファイル: evaluation.py プロジェクト: feuerchop/increOCSVM
def profile_incremental(X_train, labels=None):
    nu_start = 0.95
    nu = 0.3
    clf = ocsvm.OCSVM("rbf", nu=nu_start, gamma=10)
    if nu < nu_start:
        train_size = ceil(len(X_train) * nu / nu_start)
    break_count = X_train.shape[0]
    incremental_ocsvm(clf, X_train, train_size, nu_start * train_size, 0,
                      break_count)
    #profile.runctx('incremental_ocsvm(clf, X_train, train_size, scale, ac, break_count)',
    #               globals(), {'clf':clf, 'X_train': X_train, 'train_size': train_size,
    #                           'scale':  nu_start*train_size, 'ac': 0, 'break_count': break_count},
    #               filename='stats')
    #p = pstats.Stats('stats')
    #p.strip_dirs().sort_stats('cumulative').print_stats()
    expected = labels * novelty
    predicted = clf.predict(X_train) * novelty
    predicted[predicted == 0] = novelty
    confusion = confusion_matrix(expected, predicted)
    print("Confusion matrix:\n%s" % confusion)
    precision, recall, f1score, support = precision_recall_fscore_support(
        expected, predicted, average='binary')
    print "precision: %s, recall: %s, f1-score: %s" % (precision, recall,
                                                       f1score)
コード例 #8
0
ファイル: evaluation.py プロジェクト: feuerchop/increOCSVM
def load_digits_small():
    # The digits dataset
    digits = load_digits()

    # The data that we are interested in is made of 8x8 images of digits, let's
    # have a look at the first 3 images, stored in the `images` attribute of the
    # dataset.  If we were working from image files, we could load them using
    # pylab.imread.  Note that each image must have the same size. For these
    # images, we know which digit they represent: it is given in the 'target' of
    # the dataset.
    images_and_labels = list(zip(digits.images, digits.target))
    for index, (image, label) in enumerate(images_and_labels[:4]):
        plt.subplot(2, 4, index + 1)
        plt.axis('off')
        plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
        plt.title('Training: %i' % label)

    # To apply a classifier on this data, we need to flatten the image, to
    # turn the data in a (samples, feature) matrix:
    n_samples = len(digits.images)
    data = digits.images.reshape((n_samples, -1))
    target = digits.target

    # only take some classes of digits
    c1 = 1
    c2 = [2]
    target_bin_index = [i for i, t in enumerate(target) if t == c1 or t in c2]
    bin_samples = len(target_bin_index)
    binary_data = data[target_bin_index]
    binary_target = target[target_bin_index]
    binary_target[binary_target == c1] = 1
    binary_target[binary_target != c1] = -1

    nu_gamma_precision = [0, 0, 0]
    nu_range = [0.1 * i for i in range(1, 10)]
    gamma_range = [
        0.0001, 0.0003, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30
    ]
    for nu in nu_range:
        for gamma in gamma_range:
            clf = ocsvm.OCSVM("rbf", nu=nu, gamma=gamma)
            clf.fit(binary_data[:bin_samples / 2])
            expected = binary_target[:bin_samples / 2:]
            predicted = clf.predict(binary_data[:bin_samples / 2:])

            precision, recall, f1score, _ = precision_recall_fscore_support(
                expected, predicted, average='binary')
            if precision > nu_gamma_precision[2]:
                nu_gamma_precision = [nu, gamma, precision]

    print "nu_gamma_precision: %s" % nu_gamma_precision
    clf = ocsvm.OCSVM("rbf",
                      nu=nu_gamma_precision[0],
                      gamma=nu_gamma_precision[1])
    clf.fit(binary_data[:bin_samples / 2])
    expected = binary_target[:bin_samples / 2:]
    predicted = clf.predict(binary_data[:bin_samples / 2:])
    print("Confusion matrix:\n%s" % confusion_matrix(expected, predicted))
    precision, recall, f1score, support = precision_recall_fscore_support(
        expected, predicted, average='binary')
    print "precision: %s, recall: %s, f1-score: %s" % (precision, recall,
                                                       f1score)
コード例 #9
0
ファイル: evaluation.py プロジェクト: feuerchop/increOCSVM
def evaluate_semi_supervised(X,
                             label,
                             nu,
                             gamma,
                             train_size=20,
                             zero_init=True,
                             dataset=None,
                             ratio=0.99):
    nu_start = 0.975
    dataset = dataset if dataset is not None else "mnist"
    print "data size: %s, nu: %s, gamma: %s" % (len(X), nu, gamma)
    i = 1
    #kfold = get_kfold_cv(X.shape[0], 5)
    kfold = pickle.load(open('datasets/kddcup99/best_kfold_None.p'))
    expected = label * novelty
    expected = expected.ravel()

    precision_recall_f1 = {
        'IncreOCSVM': {
            'Precision': 0,
            'Recall': 0,
            'F1': 0
        },
        'cvxopt-OCSVM': {
            'Precision': 0,
            'Recall': 0,
            'F1': 0
        },
        'sklearn-OCSVM': {
            'Precision': 0,
            'Recall': 0,
            'F1': 0
        }
    }
    precision_recall_avg = []
    for train_index, test_index in kfold:

        print "============ %s. Fold of CV ============" % i
        print "1) Incremental OCSVM"
        break_count = len(X) - train_size
        X_train, X_test = X[train_index], X[test_index]
        label_train, label_test = label[train_index], label[test_index]

        if zero_init:
            clf_inc = ocsvm.OCSVM("rbf", nu=nu_start, gamma=gamma)
            if nu < nu_start:
                train_size = ceil(len(X_train) * nu / nu_start)

                clf_inc.fit(X_train[:train_size],
                            scale=nu_start * train_size,
                            rho=False)
                split = X_train[train_size:].shape[0] * ratio
                clf_inc.increment(X_train[train_size:][:split], init_ac=0)
                clf_inc.increment_supervised(X_train[train_size:][split:],
                                             label_train[train_size:][split:],
                                             init_ac=0)

        else:
            clf_inc = ocsvm.OCSVM("rbf", nu=nu, gamma=gamma)
            clf_inc.fit(X_train[:train_size],
                        scale=nu_start * train_size,
                        rho=False)
            incremental_ocsvm(clf_inc, X_train, train_size, nu, break_count)
        predicted = clf_inc.predict(X) * novelty
        ### confusion matrix and precision recall only for the first fold
        if i == 5:
            confusion1 = confusion_matrix(expected, predicted)
            predicted_prob = clf_inc.decision_function(X) * novelty
            both_labels = np.concatenate((expected.reshape((len(expected), 1)),
                                          predicted_prob.reshape(
                                              (len(predicted_prob), 1))),
                                         axis=1)
            both_labels = both_labels[both_labels[:, 1].argsort()]
            tmp_dict = {'label': 'Incremental Semi-Supervised OCSVM'}
            tmp_dict['precision'], tmp_dict['recall'], tmp_dict['avg_precision'] = \
            get_precision_recall_data(both_labels[:,0][::-1], both_labels[:,1][::-1])
            precision_recall_avg.append(tmp_dict)
        confusion = output_cf(expected, predicted)

        print("Confusion matrix:\n%s" % confusion)
        precision, recall, f1score, support = precision_recall_fscore_support(
            expected, predicted, average='binary')
        precision_recall_f1['IncreOCSVM']['Precision'] += precision
        precision_recall_f1['IncreOCSVM']['Recall'] += recall
        precision_recall_f1['IncreOCSVM']['F1'] += f1score
        print "precision: %s, recall: %s, f1-score: %s" % (precision, recall,
                                                           f1score)
        print "Number of support vectors: %s" % len(clf_inc._data.alpha_s())
        print "-----------"
        if X_train.shape[0] < 1000:
            print "2) cvxopt-OCSVM"
            clf = ocsvm.OCSVM("rbf", nu=nu, gamma=gamma)
            cvxopt_ocsvm(clf, X_train, nu * X_train.shape[0], nu)
            predicted = clf.predict(X) * novelty
            ### confusion matrix and precision recall only for the first fold
            if i == 5:
                confusion2 = confusion_matrix(expected, predicted)
                predicted_prob = clf.decision_function(X) * novelty
                both_labels = np.concatenate(
                    (expected.reshape((len(expected), 1)),
                     predicted_prob.reshape((len(predicted_prob), 1))),
                    axis=1)
                both_labels = both_labels[both_labels[:, 1].argsort()]
                tmp_dict = {'label': 'cvxopt-OCSVM'}
                tmp_dict['precision'], tmp_dict['recall'], tmp_dict['avg_precision'] = \
                get_precision_recall_data(both_labels[:,0][::-1], both_labels[:,1][::-1])
                precision_recall_avg.append(tmp_dict)
            confusion = output_cf(expected, predicted)
            print("Confusion matrix:\n%s" % confusion)
            precision, recall, f1score, support = precision_recall_fscore_support(
                expected, predicted, average='binary')
            precision_recall_f1['cvxopt-OCSVM']['Precision'] += precision
            precision_recall_f1['cvxopt-OCSVM']['Recall'] += recall
            precision_recall_f1['cvxopt-OCSVM']['F1'] += f1score
            print "precision: %s, recall: %s, f1-score: %s" % (precision,
                                                               recall, f1score)
            print "Number of support vectors: %s" % len(clf._data.alpha_s())
            print "---------"
        else:
            print "2) Datasize too big for cvxopt-OCSVM. Not enough memory."
        print "3) sklearn-OCSVM"
        clf = svm.OneClassSVM(kernel="rbf", nu=nu, gamma=gamma)
        sklearn_ocsvm(clf, X_train)
        predicted = clf.predict(X) * novelty
        ### confusion matrix and precision recall only for the first fold
        if i == 5:

            confusion3 = confusion_matrix(expected, predicted)
            predicted_prob = clf.decision_function(X) * novelty
            both_labels = np.concatenate((expected.reshape((len(expected), 1)),
                                          predicted_prob.reshape(
                                              (len(predicted_prob), 1))),
                                         axis=1)
            both_labels = both_labels[both_labels[:, 1].argsort()]
            tmp_dict = {'label': 'sklearn-OCSVM'}
            tmp_dict['precision'], tmp_dict['recall'], tmp_dict['avg_precision'] = \
            get_precision_recall_data(both_labels[:,0][::-1], both_labels[:,1][::-1])
            precision_recall_avg.append(tmp_dict)
        confusion = output_cf(expected, predicted)
        print("Confusion matrix:\n%s" % confusion)
        precision, recall, f1score, support = precision_recall_fscore_support(
            expected, predicted, average='binary')
        precision_recall_f1['sklearn-OCSVM']['Precision'] += precision
        precision_recall_f1['sklearn-OCSVM']['Recall'] += recall
        precision_recall_f1['sklearn-OCSVM']['F1'] += f1score
        print "Number of support vectors: %s" % len(clf.support_vectors_)
        print "precision: %s, recall: %s, f1-score: %s" % (precision, recall,
                                                           f1score)
        #plot_data.plot_multiple_cf(cm1_normalized, ['negative', 'positive'], cm2_normalized, cm3_normalized, colorbar=True)
        if i == 5 and X_train.shape[0] < 1000:
            if zero_init:
                zi = "zero"
            else:
                zi = "nonzero"
            plot_data.plot_multiple_cf(
                confusion1, ['negative', 'positive'],
                ['Incremental OCSVM', 'cvxopt-OCSVM', 'sklearn-OCSVM'],
                confusion2,
                confusion3,
                colorbar=True,
                filename_prefix="results_performance/%s_%s_%s_%s-%s" %
                (dataset, nu, gamma, zi, i))
            plot_data.plot_multiple_precision_recall_curves(
                precision_recall_avg,
                filename_prefix="results_performance/%s_%s_%s_%s-%s" %
                (dataset, nu, gamma, zi, i))
        i += 1
        #break

    print "========================================"
    print "Average Incremental OCSVM results:"
    precision = precision_recall_f1['IncreOCSVM']['Precision'] / (i - 1)
    recall = precision_recall_f1['IncreOCSVM']['Recall'] / (i - 1)
    f1 = 2 * precision * recall / (precision + recall)
    print "precision: %s, recall: %s, f1-score: %s" % (precision, recall, f1)
    print "Average cvxopt-OCSVM results:"
    precision = precision_recall_f1['cvxopt-OCSVM']['Precision'] / (i - 1)
    recall = precision_recall_f1['cvxopt-OCSVM']['Recall'] / (i - 1)
    f1 = 2 * precision * recall / (precision + recall)
    print "precision: %s, recall: %s, f1-score: %s" % (precision, recall, f1)
    print "Average sklearn-OCSVM results:"
    precision = precision_recall_f1['sklearn-OCSVM']['Precision'] / (i - 1)
    recall = precision_recall_f1['sklearn-OCSVM']['Recall'] / (i - 1)
    f1 = 2 * precision * recall / (precision + recall)
    print "precision: %s, recall: %s, f1-score: %s" % (precision, recall, f1)
コード例 #10
0
ファイル: evaluation.py プロジェクト: feuerchop/increOCSVM
def grid_search_incre(
        X,
        label,
        split=0.8,
        nu_range=[0.01 * i for i in range(1, 100)],
        gamma_range=[0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30],
        novelty=novelty,
        verbose=True):
    print "grid search incremental"
    all_data = []
    train_split = int(floor(X.shape[0] * split))
    nu_gamma_f1 = [0, 0, 0]
    nu_start = 0.975
    kfold = get_kfold_cv(X.shape[0], 5)
    #kfold = pickle.load(open('datasets/kddcup99/best_kfold_pima.p'))
    stop = False
    for nu in nu_range:

        start_train_size = int(len(X) * split * nu / nu_start)
        for gamma in gamma_range:
            #if nu == 0.03 and gamma == 0.3:
            print nu, gamma
            precision_5fold = {'min': 1, 'max': 0, 'avg': 0}
            recall_5fold = {'min': 1, 'max': 0, 'avg': 0}
            i = 0
            for train_index, test_index in kfold:
                #try:

                X_train = X[train_index]
                label_train = label[train_index]
                clf = ocsvm.OCSVM("rbf", nu=nu_start, gamma=gamma)
                clf.fit(X_train[:start_train_size],
                        scale=nu_start * len(X_train[:start_train_size]))
                success = clf.increment(X_train[start_train_size:])
                if not success: continue
                #split_1 = int(X_train[start_train_size:].shape[0] * 0.95)
                #clf.increment(X_train[start_train_size:][:split_1], init_ac=0)
                #success = clf.increment_supervised(X_train[start_train_size:][split_1:],
                #                         label_train[start_train_size:][split_1:],init_ac=0)
                #if not success: continue
                expected = np.asarray(label) * novelty
                predicted = clf.predict(X) * novelty
                precision, recall, f1score, _ = precision_recall_fscore_support(
                    expected, predicted, average='binary')

                #print "precision: %s, recall: %s, f1score: %s" % (precision, recall, f1score)
                precision_5fold['avg'] += precision
                if precision < precision_5fold['min']:
                    precision_5fold['min'] = precision
                if precision > precision_5fold['max']:
                    precision_5fold['max'] = precision

                if recall < recall_5fold['min']:
                    recall_5fold['min'] = recall
                if recall > recall_5fold['max']:
                    recall_5fold['max'] = recall

                recall_5fold['avg'] += recall
                #confusion = output_cf(expected, predicted)
                #print("Confusion matrix:\n%s" % confusion)
                i += 1
                #if f1score > 0.86:
                #stop = True
                #kfold_result = [[train_index, test_index]]
                #kfold_result = kfold
                #break
                #except:
                #    print "train error"
            if i == 0: continue
            precision_5fold['avg'] /= float(i)
            recall_5fold['avg'] /= float(i)
            if precision_5fold['avg'] + recall_5fold['avg'] > 0:
                f1 = 2 * (precision_5fold['avg'] * recall_5fold['avg']) \
                     / (precision_5fold['avg'] + recall_5fold['avg'])
            else:
                f1 = 0
            #print "averages: nu: %s, gamma: %s -> precision: %s, recall: %s, f1: %s" \
            #% (nu, gamma, precision_5fold['avg'], recall_5fold['avg'], f1)
            all_data.append(
                [nu, gamma, precision_5fold['avg'], recall_5fold['avg'], f1])
            if f1 > nu_gamma_f1[2] and recall != 1.0:
                nu_gamma_f1 = [nu, gamma, f1]
                print "averages: nu: %s, gamma: %s -> precision: %s, recall: %s, f1: %s" \
                  % (nu, gamma, precision_5fold['avg'], recall_5fold['avg'], f1)
                if f1 > 0.85:
                    stop = True
            print "--------------------------------------------------------------------"
            if stop: break
        if stop: break

    all_data = sorted(all_data, key=lambda x: -x[4])
    if verbose:
        pd.set_option('display.max_rows', None)
        df = pd.DataFrame(all_data,
                          columns=['nu', 'gamma', 'precision', 'recall', 'f1'])
        print df
    return nu_gamma_f1, kfold
コード例 #11
0
ファイル: evaluation.py プロジェクト: feuerchop/increOCSVM
def train_cvxopt_ocsvm(X_train):
    nu = 0.2
    gamma = 1
    clf = ocsvm.OCSVM("rbf", nu=nu, gamma=gamma)
    cvxopt_ocsvm(clf, X_train, nu * X_train.shape[0], nu)
コード例 #12
0
def incrementExample():
    nu = 0.3
    # Generate train data
    X = 1.5 * np.random.randn(50, 2)
    #X_train = np.r_[X + 2, X-2]
    X_train = X
    #pickle.dump(X_train, open("/Users/LT/Documents/Arbeit/Siemens/increOCSVM/Xtrain.p", "w+"))
    X_train = pickle.load(
        open("/Users/LT/Documents/Arbeit/Siemens/increOCSVM/Xtrain.p", 'r+'))
    #print X_train
    # Generate some regular novel observations
    X = 1.5 * np.random.randn(10, 2)
    #X_test = np.r_[X + 2,X-2]
    X_test = X
    # Generate some abnormal novel observations
    X_outliers = np.random.uniform(low=-4, high=4, size=(5, 2))
    #pickle.dump(X_outliers, open("/Users/LT/Documents/Arbeit/Siemens/increOCSVM/Xoutliers.p", "w+"))

    X_outliers = pickle.load(
        open("//Users/LT/Documents/Arbeit/Siemens/increOCSVM/Xoutliers.p",
             'r+'))
    #print X_outliers
    goldExample(X_train, X_test, X_outliers)
    sys.exit()
    clf1 = ocsvm.OCSVM("rbf", nu=nu, gamma=0.1)

    #clf1.train(X_train[0:1])
    clf1.fit(np.vstack((X_train, X_outliers[0])),
             scale=0.1 * len(np.vstack((X_train, X_outliers[0]))))
    plot(clf1, X_train, X_test, X_outliers, 100, False)
    plt.title("All data trained with SVM")
    #print "sum(alpha): %s" % sum(clf1._data.alpha())
    #print "standard alpha: %s" %clf1._data.alpha()
    #print "standard alpha_s: %s" %clf1._data.alpha_s()

    #goldExample(X_train, X_test, X_outliers)
    #print "standard X_s: %s "%clf1._data.Xs()
    #plt.show()
    #sys.exit()
    # Train the data
    clf = ocsvm.OCSVM("rbf", nu=nu, gamma=0.1, e=1e-6)
    #clf.train(np.vstack((X_train[1:],X_outliers[1:3]))) # testing with outliers when training
    clf.fit(X_train, scale=0.1 * len(X_train))
    plt.figure()
    #plt.title("Leave one out train with SVM")
    plot(clf, X_train, X_test, X_outliers, 100, False)
    #plt.show()
    #plot(clf, X_train[1:], X_test, X_outliers[-1:], 100, False)
    #
    clf.increment(X_outliers[0])
    #clf.increment_norm(X_outliers[0])

    #Plot the data
    plt.figure()
    #plt.title("Incremental training of new variable")
    plot(clf, np.vstack((X_train, X_outliers[0])), X_test, X_outliers[1:], 100,
         False)

    # Train the data
    #clf2 = ocsvm.OCSVM("rbf", nu=0.1, gamma=0.1)
    #clf.train(np.vstack((X_train[1:],X_outliers[1:3]))) # testing with outliers when training
    #clf2.train(np.vstack((X_train[1:],X_outliers[0])))
    #plt.figure()
    #plot(clf, X_train, X_test, X_outliers[1:], 100, False)

    #plot(clf, X_train[1:], X_test, X_outliers[-1:], 100, False)
    #
    #clf2.increment(X_train[0])

    #Plot the data
    #plt.figure()
    #plot(clf2, X_train, X_test, X_outliers, 100, False)

    #plt.draw()
    #print "point to increment"
    #
    #plt.figure()
    #plot(clf, X_train, X_test, X_outliers, 100, True)
    plt.show()