예제 #1
0
    def test_posterior(self):
        """Check the posterior over weights function finds the minimum."""
        clf = RVC()

        x = np.array([[1, 2], [3, 4]])
        y = np.array([[5, 6], [7, 8]])

        clf.phi = clf._apply_kernel(x, y)

        clf.alpha_ = np.ones(3)
        clf.m_ = np.ones(3)
        clf.t = np.array([1, 0])
        clf.beta_ = None

        clf._posterior()

        m_target = np.array([-9.157e-03,  -5.049e-08,   2.794e-05])
        sigma_target = np.array([
            [1, -4.294e-10, -3.052e-03],
            [-4.294e-10, 1, -1.875e-08],
            [-3.052e-03, -1.875e-08, 6.667e-01]
        ])

        np.testing.assert_allclose(clf.m_, m_target, rtol=1e-3)
        np.testing.assert_allclose(clf.sigma_, sigma_target, rtol=1e-3)
예제 #2
0
    def test_fit_two_classes(self):
        """Check that fitting with two classes works directly."""
        clf = RVC()

        X = np.array([[1, 2], [2, 1]])

        y = np.array(['A', 'B'])

        clf.fit(X, y)
        np.testing.assert_array_equal(clf.classes_, np.array(['A', 'B']))
예제 #3
0
    def test_fit_three_classes(self):
        """Check that fitting with three classes uses OneVSOne."""
        clf = RVC()

        X = np.array([[1, 2], [2, 1], [2, 2]])

        y = np.array(['A', 'B', 'C'])

        clf.fit(X, y)
        self.assertIsInstance(clf.multi_, OneVsOneClassifier)
        np.testing.assert_array_equal(clf.classes_, np.array(['A', 'B', 'C']))
예제 #4
0
    def test_classification_three_classes(self):
        """Check classification works with three classes."""
        iris = load_iris()

        X = iris.data
        y = iris.target

        clf = RVC()
        clf.fit(X, y)

        self.assertGreater(clf.score(X, y), 0.95)
예제 #5
0
    def test_predict_three_classes(self):
        """Check predict works with three classes."""
        clf = RVC(kernel='linear')

        X = np.array([[5, 5], [5, -5], [-5, 0]])

        y = np.array(['A', 'B', 'C'])

        clf.fit(X, y)

        prediction = clf.predict(np.array([[10, 10]]))
        np.testing.assert_array_equal(prediction, np.array(['A']))
예제 #6
0
    def test_fit_one_class(self):
        """Check that fitting with only one class raises an exception."""
        clf = RVC()

        X = np.array([[1, 2], [2, 1]])

        y = np.array(['A', 'A'])

        try:
            clf.fit(X, y)
        except ValueError as error:
            self.assertEqual(str(error), "Need 2 or more classes.")
        else:
            self.fail()
예제 #7
0
    def test_predict_two_classes(self):
        """Check that predict works with two classes."""
        clf = RVC(kernel='linear')

        X = np.array([
            [2, 1],
            [1, 2],
        ])

        y = np.array(['A', 'B'])

        clf.fit(X, y)

        prediction = clf.predict(np.array([[0, 3]]))
        np.testing.assert_array_equal(prediction, np.array(['A']))
예제 #8
0
    def test_fit_two_classes_imbalanced(self):
        """Check that fitting with two classes works with unequal samples."""
        clf = RVC()

        X = np.array([
            [1, 2],
            [1, 4],
            [4, 2],
            [2, 1],
            [3, 1.5],
        ])

        y = np.array(['A', 'A', 'B', 'B', 'B'])
        clf.fit(X, y)
        np.testing.assert_array_equal(clf.classes_, np.array(['A', 'B']))
예제 #9
0
    def test_log_posterior(self):
        """Check _log_posterior returns the correct value and jacobian."""
        clf = RVC()

        x = np.array([[1, 2], [3, 4]])
        y = np.array([[5, 6], [7, 8]])

        phi = clf._apply_kernel(x, y)

        alpha = np.ones(3)
        m = np.ones(3)
        t = np.array([1, 0])

        log_p, jacobian = clf._log_posterior(m, alpha, phi, t)

        j_target = np.array([1.013, 1, 1.466])

        self.assertAlmostEqual(log_p, 3.140, places=3)
        np.testing.assert_allclose(jacobian, j_target, rtol=1e-3)
예제 #10
0
def rvc_param_selection(
    X, y, nfolds
):  #https://medium.com/@aneesha/svm-parameter-tuning-in-scikit-learn-using-gridsearchcv-2413c02125a0
    gammas = [1e-5]
    #gammas = [1e-8,1e-7,1e-6, 1e-5,1e-4,1e-3,1e-2]
    param_grid = {'coef1': gammas}
    grid_search = GridSearchCV(RVC(kernel='rbf'), param_grid, cv=nfolds)
    grid_search.fit(X, y)
    grid_search.best_params_
    return grid_search.best_params_
예제 #11
0
    def test_hessian(self):
        """Check the hessian function returns the correct values."""
        clf = RVC()

        x = np.array([[1, 2], [3, 4]])
        y = np.array([[5, 6], [7, 8]])

        phi = clf._apply_kernel(x, y)

        alpha = np.ones(3)
        m = np.ones(3)
        t = np.array([1, 0])

        hessian = clf._hessian(m, alpha, phi, t)

        h_target = np.array([[1, 4.018e-10, 3.571e-03],
                             [4.018e-10, 1, 2.194e-08],
                             [3.571e-03, 2.194e-08, 1.392]])

        np.testing.assert_allclose(hessian, h_target, rtol=1e-3)
예제 #12
0
def build_and_run_rvc(X_train_scaled, y_train, X_test_scaled, y_test):
    '''
    Takes: training and testing data
    Returns: time to fit, time to predict, plus it prints
    '''

    # build RVC
    rvc_model = RVC()
    print("fitting RVM:")
    start = time.time()
    rvc_model.fit(X_train_scaled, y_train)
    delta0 = time.time() - start
    print("time to fit RVM: ", delta0)

    start = time.time()
    rvc_predict = rvc_model.predict(X_test_scaled)
    delta1 = time.time() - start
    print("time to predict with RVM: ", delta1)

    # print parameters
    print("RVM hyperparameters:")
    print(rvc_model.get_params())

    # evaluate RVC
    print(helpers.confusion_matrix(y_test, rvc_predict))
    print(classification_report(y_test, rvc_predict))

    return delta0, delta1
예제 #13
0
def TrainMyRVM(XEstimate,
               XValidate,
               ClassLabelsEstimate,
               ClassLabelsValidate,
               Parameters=None):

    training_labels = np.int8(np.zeros(ClassLabelsEstimate.shape[0]))
    validate_labels = np.int8(np.zeros(ClassLabelsValidate.shape[0]))
    for i in range(ClassLabelsEstimate.shape[0]):
        training_labels[i] = np.where(ClassLabelsEstimate[i] == 1)[0]
    for i in range(ClassLabelsValidate.shape[0]):
        validate_labels[i] = np.where(ClassLabelsValidate[i] == 1)[0]

    #get 4000 samples of training data
    #this will get the indices and will give the data and labels the same indices
    idx_training = np.random.choice(np.arange(len(training_labels)),
                                    4000,
                                    replace=False)
    training_labels_sampled = training_labels[idx_training]
    XEstimate_sampled = XEstimate[idx_training]

    #get 1000 samples of training data
    #this will get the indices and will give the data and labels the same indices
    idx_validate = np.random.choice(np.arange(len(validate_labels)),
                                    1000,
                                    replace=False)
    XValidate_sampled = XValidate[idx_validate]
    ClassLabelsValidate_sampled = ClassLabelsValidate[idx_validate]

    #initialize RVM with classification (RVC class)
    rvm = RVC(kernel='rbf', n_iter=1, alpha=1.e-6, beta=1.e-6, verbose=True)
    #fit RVM
    rvm.fit(XEstimate_sampled, training_labels_sampled)
    #predict and return an array of classes for each input
    Yvalidate = rvm.predict(XValidate_sampled)
    EstParameters = rvm
    Rvectors = 1
    return Yvalidate, EstParameters, Rvectors, ClassLabelsValidate_sampled, idx2
예제 #14
0
def _get_best_model_rvm(X_train, y_train):

    #Cs = [0.001, 0.01, 0.1, 1, 10, 100]
    #Cs = [1, 2, 4, 8, 16, 32]
    gammas = [0.001, 0.01, 0.1, 1, 10, 100]
    #param_grid = {'kernel':['rbf'], 'C': Cs, 'gamma' : gammas}
    param_grid = {'kernel': ['rbf'], 'gamma': gammas}

    rvc = RVC()
    clf = GridSearchCV(rvc, param_grid, scoring='accuracy', verbose=2)
    #cv=10, scoring='accuracy', verbose=2)

    clf.fit(X_train, y_train)

    model = clf.best_estimator_

    return model
예제 #15
0
def rvc_analysis(random_seed, save_path):
    # Load the data
    # TODO: change the path
    save_path = os.path.join(save_path, 'random_seed_%03d' %random_seed)
    print('Random seed: %03d' %random_seed)
    # Load the saved validation dataset
    project_ukbio_wd, project_data_ukbio, _ = get_paths(debug, dataset)
    with open(os.path.join(save_path, 'splitted_dataset_%s.pickle' %dataset), 'rb') as handle:
            splitted_dataset = pickle.load(handle)

    # Train the model
    model = RVC(kernel='linear')
    model.fit(splitted_dataset['Xtrain_scaled'], splitted_dataset['Ytrain'])

    # make cross validated predictions
    print('Perform prediction in test data')
    y_prediction_test = model.predict(splitted_dataset['Xtest_scaled'])

    y_prediction_validation = model.predict(splitted_dataset['Xvalidate_scaled'])

    # -----------------------------------------------------------------------------
    # Do some statistics. Calculate the confusion matrix

    # Test dataset
    # Look at the confusion matrix for test data
    class_name = np.array(['young', 'old', 'adult'], dtype='U10')
    ax, cm_test = plot_confusion_matrix(splitted_dataset['Ytest'], y_prediction_test,
                          classes=class_name,
                          normalize=True)
    # Look at accuracy
    accuracy_test = accuracy_score(splitted_dataset['Ytest'], y_prediction_test)
    plt.savefig(os.path.join(save_path, 'confusion_matrix_test_rvc.eps'))

   # Predict on the validation dataset
    ax, cm_validation = plot_confusion_matrix(splitted_dataset['Yvalidate'], y_prediction_validation,
                          classes=class_name,
                          normalize=True)
    plt.savefig(os.path.join(save_path, 'confusion_matrix_validation_rvc.eps'))
    # Look at accuracy
    accuracy_val = accuracy_score(splitted_dataset['Yvalidate'],
                                   y_prediction_validation)
    plt.savefig(os.path.join(save_path, 'confusion_matrix_test_rvc.eps'))
    return cm_test, cm_validation, accuracy_test, accuracy_val
예제 #16
0
    def test_classification_two_classes(self):
        """Check classification works with two classes."""
        iris = load_iris()

        X = iris.data[:, 1:]
        y = iris.target

        # Only 2 classes needed
        X = X[y != 0]
        y = y[y != 0]

        clf = RVC()

        clf.fit(X, y)

        self.assertGreater(clf.score(X, y), 0.95)

        prob = clf.predict_proba(X[0, :])
        p_target = np.array([[0.999, 5.538e-4]])
        np.testing.assert_allclose(prob, p_target, rtol=1e-2, atol=1e-2)
예제 #17
0
def RVM(X_hyper, Y_hyper, X_train, Y_train, X_validate, Y_validate, params):
    clf = RVC(n_iter=100, tol=0.1)
    start = time.clock()

    X_train_reduced = X_train
    X_validate_reduced = X_validate

    train_size = params['train_size']
    test_size = params['test_size']
    train = params['train']

    if train:
        clf.fit(X_train_reduced[:train_size, :], Y_train[:train_size])
        writeObj('rvm_model.pkl', clf)

        Y_pred = clf.predict(X_validate_reduced[:test_size])
        return Y_pred, clf
    else:
        clf = readObj('rvm_model.pkl')
        Y_pred = clf.predict(X_validate_reduced[:test_size])
        return Y_pred, clf

    print "training took ", time.clock() - start, "s"
예제 #18
0
feature_scaler = StandardScaler()  
X_train = feature_scaler.fit_transform(X_train)  
X_test = feature_scaler.transform(X_test)  
####################################################################################################################
#CROSS VALIDATION SPLIT IN K-folds
######################################################################################################################
kf = KFold(n_splits=5)
kf.get_n_splits(X_train,X_test)    
print(kf)
#CREATE WIDTHS FOR GRID SEARCH
width1= np.linspace(-5,4,10)
width=10**width1
#create matrix to input the scores and widths of each iteration
score_width=np.ones([len(width),2])
##################
#FIRST for loop gives the values for different widths, SECOND for loop does Kfold_validation
for i in range(len(width)):
    score=0
    for train_index, test_index in kf.split(X_train):
        print("TRAIN:", train_index, "TEST:", test_index)
        X_train1, X_test1 = X_train[train_index], X_train[test_index]
        y_train1, y_test1 =y_train[train_index], y_train[test_index]
        clf1=RVC(kernel='rbf',coef1=width[i])
        clf1.fit(X_train1,y_train1)
        score=score+clf1.score(X_test1,y_test1) 
    score_width[i,0]=score
    score_width[i,1]=width[i]
#########################################################################################################
idx=np.argmax(score_width[:,0])
best_width=score_width[idx,1]
예제 #19
0
파일: rvm.py 프로젝트: ckbjimmy/nlp
import numpy as np
import scipy as sp
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import pandas as pd
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")

# RVM
from skrvm import RVC
clf = RVC()

### read data from single csv file and transform to a list
dir = '/Users/weng/Downloads/'
# raw = pd.read_csv(os.path.join(dir, 'test.txt'), header=0, delimiter=';')
raw_list = pd.read_csv(os.path.join(dir, 'test.txt'), sep="\t", header=None)
# array format is needed for further processing (df -> list -> matrix(array) )
raw_X = raw_list[0].values.tolist()
raw_X = np.asarray(raw_X)

raw_y = raw_list[1].values.tolist()
raw_y = np.asarray(raw_y)

print "Read %d rows of data\n" % len(raw)

clf.fit(raw_X, raw_y)
예제 #20
0
def TrainMyClassifier(XEstimate, ClassLabels, XValidate, Parameters):
    # RVM
    if Parameters['algorithm'] == 'RVM':
        Parameters = Parameters['parameters']

        clf = RVC(alpha=Parameters.get('alpha'),
                  beta=Parameters.get('beta'),
                  n_iter=Parameters.get('n_iter'))
        clf.fit(XEstimate, ClassLabels)
        if np.shape(clf.classes_)[0] == 2:
            Yvalidate = clf.predict_proba(XValidate)
        else:
            Yvalidate = predict_proba(clf, XValidate)
        EstParameters = get_params(clf)

        return Yvalidate, EstParameters
#SVM

    elif Parameters['algorithm'] == 'SVM':

        svc = get_svc(Parameters)
        svc_train(svc, XEstimate, ClassLabels)
        prob = svc_probability(svc, XValidate)
        EstParameters = svc_get_para(svc)

        prob_std = np.ndarray.std(prob, axis=1)[:, np.newaxis]
        sigmoid = 1 - expit(prob_std)
        Yvalidate = np.concatenate([prob, sigmoid], axis=1)
        Yvalidate = Yvalidate / np.repeat(
            (sigmoid + 1), axis=1, repeats=len(svc.classes_) + 1)

        return Yvalidate, EstParameters
#GPR
    elif Parameters["algorithm"] == "GPR":
        # get the classes from the labels
        classes = np.unique(ClassLabels, axis=0)
        sorted(classes, reverse=True)
        num_class = len(classes)

        # get data and label based on classes
        data = []
        for cla in classes:
            data.append(XEstimate[ClassLabels == cla])

        target = []
        for cla in classes:
            target.append(ClassLabels[ClassLabels == cla])

        # put data and label into a matrix, so that we could do a easier calculation for probability
        # the following calculation is all based on the matrix
        data_matrix = []
        for i in range(num_class - 1):
            data_matrix.append([])
            for j in range(num_class - 1):
                data_matrix[i].append(None)

        target_matrix = []
        for i in range(num_class - 1):
            target_matrix.append([])
            for j in range(num_class - 1):
                target_matrix[i].append(None)

        for i in range(num_class - 1):
            for j in range(i, num_class - 1):
                data_matrix[i][j] = np.concatenate([data[i], data[j + 1]],
                                                   axis=0)
                target_matrix[i][j] = np.concatenate(
                    [target[i], target[j + 1]], axis=0)

        classifier_matrix = []
        for i in range(num_class - 1):
            classifier_matrix.append([])
            for j in range(num_class - 1):
                classifier_matrix[i].append(None)

        for i in range(num_class - 1):
            for j in range(i, num_class - 1):
                gpc_classifier = GaussianProcessClassifier(
                    kernel=Parameters["parameters"]["kernel"],
                    optimizer=Parameters["parameters"]["optimizer"],
                    n_restarts_optimizer=Parameters["parameters"]
                    ["n_restarts_optimizer"],
                    max_iter_predict=Parameters["parameters"]
                    ["max_iter_predict"],
                    warm_start=Parameters["parameters"]["warm_start"],
                    copy_X_train=Parameters["parameters"]["copy_X_train"],
                    random_state=Parameters["parameters"]["random_state"],
                    multi_class="one_vs_rest",
                    n_jobs=Parameters["parameters"]["n_jobs"])
                gpc_classifier.fit(data_matrix[i][j], target_matrix[i][j])
                classifier_matrix[i][j] = gpc_classifier

        out_matrix = []
        for i in range(num_class - 1):
            out_matrix.append([])
            for j in range(num_class - 1):
                out_matrix[i].append(None)

        for i in range(num_class - 1):
            for j in range(i, num_class - 1):
                out_matrix[i][j] = classifier_matrix[i][j].predict_proba(
                    XValidate)

        # calculate the whole prediction prob
        val_shape = XValidate.shape[0]
        predict_prob_list = []
        for i in range(num_class):
            predict_prob_list.append(np.zeros(shape=[val_shape, 1]))

        for i in range(num_class - 1):
            for j in range(i, num_class - 1):
                predict_prob_list[i] += out_matrix[i][j][:,
                                                         0][:, np.newaxis] / (
                                                             num_class * 2)
                predict_prob_list[
                    j +
                    1] += out_matrix[i][j][:, 1][:,
                                                 np.newaxis] / (num_class * 2)

        # get the result of num_class probability
        result = np.concatenate(predict_prob_list, axis=1)

        # calculate the probability for the one more class
        std = np.std(result, axis=1)[:, np.newaxis]
        other_prob = np.exp(-std) / (1 + np.exp(std * 5))
        result = np.concatenate([result, other_prob], axis=1)
        result = result / np.repeat(
            (other_prob + 1), axis=1, repeats=num_class + 1)

        # put all the parameters into a dict
        estParameters = {}
        estParameters["class_num"] = num_class
        estParameters["parameters"] = []
        for i in range(num_class - 1):
            for j in range(i, num_class - 1):
                estParameters["parameters"].append({
                    "log_marginal_likelihood_value_":
                    classifier_matrix[i][j].log_marginal_likelihood_value_,
                    "classes_":
                    classifier_matrix[i][j].classes_,
                    "n_classes_":
                    classifier_matrix[i][j].n_classes_,
                    "base_estimator_":
                    classifier_matrix[i][j].base_estimator_
                })

        return result, estParameters
예제 #21
0
파일: rvm.py 프로젝트: abhven/ML-Project
"""
Authors: Mrunmayee Deshpande, Lu Gan, Bruce Huang, Abhishek Venkataraman 

"""
import timeit
from skrvm import RVC
import numpy as np
import os.path
import scipy.io

from import_data import import_data

## Set data path
parsed_data_path = 'parsed_data/'
[X, Y, valX, valY, testX, testY] = import_data(parsed_data_path)

scipy.io.savemat('train.mat', dict(X=X, Y=Y))
scipy.io.savemat('val.mat', dict(valX=valX, valY=valY))
scipy.io.savemat('test.mat', dict(testX=testX, testY=testY))

## Train a RVM
clf = RVC(verbose=True)
print(clf)
clf.fit(valX, valY)
clf.score(testX, testY)
예제 #22
0
p1_data = full_data_matrix[0:690]
p1_classes = full_class_array[0:690]

#normalize
p1_normal_data = preprocessing.scale(p1_data)  #normalize

#PCA
pca = PCA(10, svd_solver='auto')
pca.fit(p1_normal_data)
p1_pca_data = pca.transform(p1_normal_data)  #transform data to xx components

p1_pca_data
#classes_numbered, class_numbers_names = class2numbers(p1_classes)

## RVM classification
clf1 = RVC()
clf1.fit(p1_pca_data, p1_classes)

pred = clf1.predict(p1_pca_data)

correct = 0
for i in range(np.size(pred, 0)):
    if pred[i] == p1_classes[i]:
        correct += 1

clf1

params = clf1.get_params
params.alpha_

clf1.predict_proba
예제 #23
0
#Running RVC - HCvMCI

#Reintegrate the validation set into the train/test sets for RVC

RXTrainHvM = HCvMCI[train_inds,]
RYTrainHvM = YHvM[train_inds]
RXTestHvM = HCvMCI[test_inds,]
RYTestHvM = YHvM[test_inds]

#resampling w/ SMOTE to account for uneven sampling

[XTrainResHvM,YTrainResHvM] = SMOTE(random_state = 100,k_neighbors = 3).fit_resample(RXTrainHvM,RYTrainHvM)
[XTestResHvM,YTestResHvM] = SMOTE(random_state = 100,k_neighbors = 3).fit_resample(RXTestHvM,RYTestHvM)

from skrvm import RVC
RVCMod = RVC(kernel = 'linear',
             verbose = True)
RVCMod.fit(XTrainResHvm,YTrainResHvM)

#create feature importance evaluation function

def RVMFeatImp(RVs):
    NumRVs = RVs.shape[0]
    SumD = 0
    for RVNum in range(1,NumRVs):
        d1 = RVs[RVNum-1,]
        d2 = sum(numpy.ndarray.flatten(
                RVs[numpy.int8(
                        numpy.setdiff1d(numpy.linspace(0,NumRVs-1,NumRVs),RVNum))]))
        SumD = SumD + (d1/d2)
    SumD = SumD/NumRVs
    return SumD
def TrainMyClassifier(XEstimate,
                      YEstimate,
                      XValidate,
                      YValidate,
                      Parameters=[]):
    """
    INPUTS:
          XEstimate - Feature vectors on which the model has to be trained
          YEstimate - Target estimates for the XEstimates
          XValidate - Feature vectors which are used to tune the hyperparameters
          Parameters - Hyerparameters of the respective algorithm

    OUTPUTS:
          classLabels - The estimated label values for each XValidate entry
          EstParams - The estimated parameters corresponding to each algorithm
    """

    threshold = 0.5  # metric to classify non-class entry
    Algorithm = Parameters[0]

    # extract true labels estimate
    Y_E = []
    Labels = YEstimate.tolist()
    for lis in Labels:
        if 1 in lis:
            Y_E.append(lis.index(1))
        else:
            Y_E.append(5)

    # extract true labels validate
    Y_V = []
    Labels = YValidate.tolist()
    for lis in Labels:
        if 1 in lis:
            Y_V.append(lis.index(1))
        else:
            Y_V.append(5)

    Y_V = np.array(Y_V)

    if Algorithm == "SVM":
        model = SVC(decision_function_shape='ovo', probability=True)
        clf = GridSearchCV(model, Parameters[1], cv=2)
        clf.fit(XEstimate, Y_E)
        proba = clf.predict_proba(XValidate)
        accuracy = clf.score(XValidate, Y_V)

        estParams = {
            'hyper': clf.best_params_,
            'model': clf.best_estimator_,
            'dual_coef': clf.best_estimator_.dual_coef_,
            'intercept': clf.best_estimator_.intercept_,
            'support_vectors': clf.best_estimator_.n_support_
        }

    elif Algorithm == "RVM":
        # perform PCA on data to reduce time
        Y_E = np.array(Y_E)
        #pca = PCA(n_components=8)
        #XEstimate = pca.fit_transform(XEstimate)
        #XValidate = pca.fit_transform(XValidate)

        threshold = 0.3

        posn, XEstimate_Fraction, Y_E_Fraction = {}, {}, {}

        Nc = 5

        for i in range(Nc):
            posn[i] = np.where(Y_E == i)

        for i in range(Nc):
            XEstimate_Fraction[i] = XEstimate[posn[i]]
            Y_E_Fraction[i] = Y_E[posn[i]]

        size = np.shape(XValidate)[0]
        predict_proba = np.zeros((size, 5))
        num_rvectors = []

        classifierObjs = []

        for i in range(Nc):
            for j in range(i + 1, Nc):
                classifierObjs.append(RVC(n_iter=1, kernel='linear'))
                classifierObjs[-1].fit(
                    np.concatenate(
                        (XEstimate_Fraction[i], XEstimate_Fraction[j]),
                        axis=0),
                    np.concatenate((Y_E_Fraction[i], Y_E_Fraction[j]), axis=0))
                sc_proba = classifierObjs[-1].predict_proba(XValidate)

                predict_proba[:, i] += sc_proba[:, 0]
                predict_proba[:, j] += sc_proba[:, 1]

                num_rvectors.append(classifierObjs[-1].relevance_.shape[0])

        proba = predict_proba / 10

        count1 = count2 = 0

        #print(proba)

        for i in range(len(predict_proba)):
            pos = predict_proba[i].argmax(axis=0)
            if pos == Y_V[i]:
                if predict_proba[i][pos] > 0.3:
                    count1 += 1
                count2 += 1

        accuracy = float(count1) / len(predict_proba)

        #print("Inside accuracy < 0.3 is" + str(accuracy))

        accuracy = float(count2) / len(predict_proba)

        #print("Inside accuracy is" + str(accuracy))

        avg_rvectors = np.average(num_rvectors)

        print("Average number of relevance vectors: " + str(avg_rvectors))

        estParams = {'model': classifierObjs, 'avg_rel_vectors': avg_rvectors}

    elif Algorithm == "GPR":
        # perform PCA on data to reduce time
        # pca = PCA(n_components=8)
        # XEstimate = pca.fit_transform(XEstimate[:1000,:])
        # XValidate = pca.fit_transform(XValidate)
        #print XEstimate.shape
        #print len(Y_E)

        kernal_rbf = 1 * RBF(length_scale=1.0,
                             length_scale_bounds=(1e-05, 100000.0))
        #clf = OneVsRestClassifier(GaussianProcessClassifier(kernel = kernal_rbf))
        clf = GaussianProcessClassifier(kernel=kernal_rbf,
                                        multi_class='one_vs_rest')
        print 'fitting'
        clf.fit(XEstimate, Y_E)
        print 'predicting'
        proba = clf.predict_proba(XValidate)
        print 'scoring'
        accuracy = clf.score(XValidate, Y_V)
        print 'accuracy'
        print accuracy
        estParams = {'model': clf}

    classLabels = np.full((len(YValidate), 6), -1, dtype=np.int)

    for i, p in enumerate(proba):
        idx = np.argmax(p)
        if p[idx] < threshold:
            classLabels[i][-1] = 1
        else:
            # print p
            classLabels[i][idx] = 1

    estParams['classLabels'] = classLabels
    estParams['accuracy'] = accuracy
    print("Accuracy is: " + str(accuracy))

    return classLabels, estParams
예제 #25
0
파일: rvm.py 프로젝트: TrinaZ/MixGenotype
from skrvm import RVC
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split


def load_data():
    f = open('F:/importantfilecopy/tandomrepeat/2019.121test/5results', 'r')
    # f = open('F:/importantfilecopy/tandomrepeat/3/1x/total.txt', 'r')
    data_set = []
    label_set = []
    for line in f:
        line = line.strip().split("\t")
        line_list = line[0].strip().split(" ")
        data_set.append(line_list)
        label_set.append(line[1])
    return data_set, label_set


data, label = load_data()
X_train, X_test, y_train, y_test = train_test_split(data,
                                                    label,
                                                    test_size=0.3,
                                                    random_state=0)
clf = RVC()
# clf.fit(rvm_data, rvm_target)
# print(clf.score(rvm_data, rvm_target))
clf.fit(X_train, y_train)
scoring = 'accuracy'
scores = cross_val_score(clf, X_test, y_test, cv=7)
print(scores.mean())
예제 #26
0
파일: untitled0.py 프로젝트: notepi/mywork
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 20 21:26:07 2018

@author: pan
"""

from skrvm import RVC
from sklearn.datasets import load_iris

clf = RVC()
iris = load_iris()
clf.fit(iris.data, iris.target)
#RVC(alpha=1e-06, beta=1e-06, beta_fixed=False, bias_used=True, coef0=0.0,
#coef1=None, degree=3, kernel='rbf', n_iter=3000, n_iter_posterior=50,
#threshold_alpha=1000000000.0, tol=0.001, verbose=False)
clf.score(iris.data, iris.target)
    New[i, :] = n.params
#mdl = smt.AR(d_tot[100,:]).fit(maxlag=30, ic='aic', trend='nc')
#est_order=smt.AR(d_tot[1,:]).select_order(maxlag=30, ic='aic', trend='nc')
#print(est_order)
#p_orders=np.zeros([1380,1])
#for i in range(1380):
#   X=AR(d_tot[i,:])
#  n=X.fit(maxlag=4,ic='aic')
# p_orders[i,0]=len(n.params)

#np.mean(p_orders)

#plt.scatter(New[:,1],New[:,3])

An_lab = Animal_label(tot)

#shuffle data to take them in random order
indx = random.sample(range(10350), 10350)
a = New[indx, :]
b = An_lab[indx]
#define trainning and test set
x_train = a[0:8000, :]
y_train = b[0:8000]
x_test = a[8001:, :]
y_test = b[8001:]
#SVM with 600 out of 690 samples as training data
from skrvm import RVC
clf1 = RVC(kernel='rbf')
clf1.fit(x_train, y_train)
clf1.score(x_test, y_test)
예제 #28
0
from skrvm import RVC
from sklearn.datasets import load_iris

if __name__ == "__main__":
    clf = RVC()
    clf.verbose = True  # Print iteration, alpha, beta, gamma, m, Relevance vectors
    data = load_iris()
    trainData = data.data
    trainTargets = data.target
    print(clf.fit(trainData, trainTargets))
    #print(clf.score(trainData, trainTargets))
예제 #29
0
 def test__init__(self):
     """Check the additional params on the classifier are initialized."""
     clf = RVC(n_iter_posterior=30)
     self.assertEqual(clf.n_iter_posterior, 30)
예제 #30
0
X = full_normPCA123_array[train_indicies]
Y = full_isAnimal_array[train_indicies]

params = rvc_param_selection(X, Y, 5)
#
#params = rvc_param_selection2(X,Y,5)

####################TO TEST
test_err = np.zeros((15))
train_err = np.zeros((15))
coef = np.multiply(
    [1000, 500, 200, 150, 100, 70, 60, 50, 40, 30, 20, 10, 5, 2, 1], 1e-6)
for i in range(15):
    #coef[i] = 5e-5-(i+1)*1e-5
    clf = RVC(kernel="rbf", coef1=coef[i])  # coef1:  1=46 0.1same
    clf.fit(full_normPCA123_array[train_indicies],
            full_subClass_array[train_indicies])
    train_err[i] = clf.score(full_normPCA123_array[train_indicies],
                             full_subClass_array[train_indicies])
    test_err[i] = clf.score(full_normPCA123_array[test_indicies],
                            full_subClass_array[test_indicies])

    print(coef[i])
    print(train_err[i])
    print(test_err[i])
    print("\n\n")

#####################################################################################################################
rvm_tested_coef1 = np.load("rvm_tested_coef1.npy")
rvm_tested_coef1_again = np.load("rvm_tested_coef1_again.npy")