コード例 #1
0
ファイル: test_qda.py プロジェクト: youngstone/scikit-learn
def test_qda_priors():
    clf = qda.QDA()
    y_pred = clf.fit(X, y).predict(X)
    n_pos = np.sum(y_pred == 2)

    neg = 1e-10
    clf = qda.QDA(priors=np.array([neg, 1 - neg]))
    y_pred = clf.fit(X, y).predict(X)
    n_pos2 = np.sum(y_pred == 2)

    assert_greater(n_pos2, n_pos)
コード例 #2
0
def test_qda_regularization():
    # the default is reg_param=0. and will cause issues
    # when there is a constant variable
    clf = qda.QDA()
    y_pred = clf.fit(X2, y).predict(X2)
    assert_true(np.any(y_pred != y))

    # adding a little regularization fixes the problem
    clf = qda.QDA(reg_param=0.01)
    y_pred = clf.fit(X2, y).predict(X2)
    assert_array_equal(y_pred, y)
コード例 #3
0
def test_qda_store_covariances():
    # The default is to not set the covariances_ attribute
    clf = qda.QDA().fit(X, y)
    assert_true(not hasattr(clf, 'covariances_'))

    # Test the actual attribute:
    clf = qda.QDA().fit(X, y, store_covariances=True)
    assert_true(hasattr(clf, 'covariances_'))

    assert_array_almost_equal(clf.covariances_[0],
                              np.array([[0.7, 0.45], [0.45, 0.7]]))

    assert_array_almost_equal(
        clf.covariances_[1],
        np.array([[0.33333333, -0.33333333], [-0.33333333, 0.66666667]]))
コード例 #4
0
def getPredictionAcc(classifier, components, tr_x, tr_y, te_x, te_y):
    """
    given a classifier choice, a desired dimensionality reduction, and test and training data,
    train a model and make predictions on the test balanced_set
    return the accuracy of the generated model

    Classifier Choices: 'SGD', 'Linear-SVC', 'SVC-rbf', 'Perceptron-L1', 'Perceptron-L2', 'kNN', 'QDA'
    """
    choices = {
        'SGD': linear_model.SGDClassifier(),
        'Linear-SVC': svm.LinearSVC(),
        'SVC-rbf': svm.SVC(kernel='rbf'),
        'Perceptron-L1': linear_model.Perceptron(penalty='l1'),
        'Perceptron-L2': linear_model.Perceptron(penalty='l2', n_iter=25),
        'kNN': neighbors.KNeighborsClassifier(),
        'QDA': qda.QDA(),
    }
    # clf = Pipeline([('vect', CountVectorizer(stop_words='english', encoding='latin-1')),
    clf = Pipeline([
        ('vect', CountVectorizer(encoding='latin-1')),
        # 5a - this strongly affects the quality of the result ...
        # ('GRP', GaussianRandomProjection(n_components=components)),
        ('GRP',
         SparseRandomProjection(n_components=components, dense_output=True)),
        # 5b
        ('Scaler', StandardScaler()),
        # 5c
        (classifier, choices[classifier])
    ])
    clf = clf.fit(tr_x, tr_y)
    predicted = clf.predict(te_x)
    return np.mean(predicted == te_y)
コード例 #5
0
ファイル: test_qda.py プロジェクト: youngstone/scikit-learn
def test_qda():
    """
    QDA classification.

    This checks that QDA implements fit and predict and returns
    correct values for a simple toy dataset.
    """
    clf = qda.QDA()
    y_pred = clf.fit(X, y).predict(X)
    assert_array_equal(y_pred, y)

    # Assure that it works with 1D data
    y_pred1 = clf.fit(X1, y).predict(X1)
    assert_array_equal(y_pred1, y)

    # Test probas estimates
    y_proba_pred1 = clf.predict_proba(X1)
    assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y)
    y_log_proba_pred1 = clf.predict_log_proba(X1)
    assert_array_almost_equal(np.exp(y_log_proba_pred1), y_proba_pred1, 8)

    y_pred3 = clf.fit(X, y3).predict(X)
    # QDA shouldn't be able to separate those
    assert_true(np.any(y_pred3 != y3))

    # Classes should have at least 2 elements
    assert_raises(ValueError, clf.fit, X, y4)
コード例 #6
0
 def __init__(self, classifier):
     if classifier == 'svm':
         self.clf = svm.SCV()
     elif classifier == 'lda':
         self.clf = lda.LDA()
     elif classifier == 'qda':
         self.clf = qda.QDA()
コード例 #7
0
ファイル: Model.py プロジェクト: MrTyton/GoLD
 def buildModelLDA(self, outputFile, priorProbs=[0.5, 0.5]):
     classifier = qda.QDA(priors=priorProbs)
     classifier.fit(self.instances, self.classes)
     modelData = pickle.dumps(classifier)
     f = open(outputFile, "w")
     f.write(modelData)
     f.close()
コード例 #8
0
def random_forest(X, t):
    clf = qda.QDA()
    clf.fit(X, t)

    def random_forest_predict(x):
        return clf.predict_proba(x)[:, 1]

    return random_forest_predict
コード例 #9
0
ファイル: classification.py プロジェクト: maggishaggy/spice
def get_classifier(classifier_str):
    '''
    This functions maps the classifier string classifier_str to the
    corresponding classifier object with the default paramers set.
    '''

    # SVC
    if (classifier_str == 'linearsvc'):
        cl = svm.LinearSVC(**svm_default_param)
    elif (classifier_str == 'svc_linear'):
        libsvm_default_param['kernel'] = 'linear'
        cl = svm.SVC(**libsvm_default_param)
    elif (classifier_str == 'svc_rbf'):
        libsvm_default_param['kernel'] = 'rbf'
        cl = svm.SVC(**libsvm_default_param)
    # polynomial, sigmoid kernel
    # nuSVC
    # Nearest Neighbors (euclidian distance used by default)
    elif (classifier_str == 'kn_uniform'):
        kn_default_param['weights'] = 'uniform'
        cl = neighbors.KNeighborsClassifier(**kn_default_param)
    elif (classifier_str == 'kn_distance'):
        kn_default_param['weights'] = 'distance'
        cl = neighbors.KNeighborsClassifier(**kn_default_param)
    elif (classifier_str == 'rn_uniform'):
        rn_default_param['weights'] = 'uniform'
        cl = neighbors.RadiusNeighborsClassifier(**rn_default_param)
    elif (classifier_str == 'rn_distance'):
        rn_default_param['weights'] = 'distance'
        cl = neighbors.RadiusNeighborsClassifier(**rn_default_param)
    elif (classifier_str == 'nc'):
        cl = neighbors.NearestCentroid()
    # LDA and QDA, priors are by default set to 1/len(class) for each class
    elif (classifier_str == 'lda'):
        cl = lda.LDA()
    elif (classifier_str == 'qda'):
        cl = qda.QDA()
    # Gaussion naive bayes
    # from the code it is unclear how priors are set
    elif (classifier_str == 'gnb'):
        cl = naive_bayes.GaussianNB()
    elif (classifier_str == 'mnb'):
        cl = naive_bayes.MultinomialNB()
    elif (classifier_str == 'bnb'):
        cl = naive_bayes.BernoulliNB()
    # Decision tree
    elif (classifier_str == 'dtree'):
        cl = tree.DecisionTreeClassifier()
    elif (classifier_str == 'rforest'):
        cl = ensemble.RandomForestClassifier()
    else:
        # raise error if classifier not found
        raise ValueError('Classifier not implemented: %s' % (classifier_str))

    return (cl)
コード例 #10
0
ファイル: test_qda.py プロジェクト: youngstone/scikit-learn
def test_qda_regularization():
    # the default is reg_param=0. and will cause issues
    # when there is a constant variable
    clf = qda.QDA()
    with ignore_warnings():
        y_pred = clf.fit(X2, y).predict(X2)
    assert_true(np.any(y_pred != y))

    # adding a little regularization fixes the problem
    clf = qda.QDA(reg_param=0.01)
    with ignore_warnings():
        clf.fit(X2, y)
    y_pred = clf.predict(X2)
    assert_array_equal(y_pred, y)

    # Case n_samples_in_a_class < n_features
    clf = qda.QDA(reg_param=0.1)
    with ignore_warnings():
        clf.fit(X5, y5)
    y_pred5 = clf.predict(X5)
    assert_array_equal(y_pred5, y5)
コード例 #11
0
ファイル: test_qda.py プロジェクト: the872/pandas-ml
    def test_QDA(self):
        X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
        y = np.array([1, 1, 1, 2, 2, 2])

        df = pdml.ModelFrame(X, target=y)

        mod1 = df.qda.QDA()
        mod2 = qda.QDA()

        df.fit(mod1)
        mod2.fit(X, y)

        result = df.predict(mod1)
        expected = mod2.predict(X)

        self.assertTrue(isinstance(result, pdml.ModelSeries))
        self.assert_numpy_array_equal(result.values, expected)
コード例 #12
0
    #     pca.fit()
    #
    #
    #     pca = grid.best_estimator_



if __name__ == '__main__':
    REGEX = re.compile(sys.argv[1])
    INPUT = sys.argv[2]
    NFOLDS = 10

    classifiers = [
        ('SVC',   '#00995C', svm.SVC(kernel='linear', class_weight='auto', random_state=1)),
        ('LSVC',  '#5C991F', svm.LinearSVC(class_weight='auto',random_state=1)),
        ('QDA',   '#995C1F', qda.QDA()),
        ('LDA',   '#9966FF', lda.LDA()),
        ('RF',    '#991F5C', RandomForestClassifier(class_weight='auto', random_state=1)),
    ]

    X, Y = load_data(INPUT, REGEX)
    Xscaled = preprocessing.scale(X)

    N = X.shape[0]
    attr_range = range(1, N/2, 5)

    def save(fname):
        savefig(fname, bbox_inches='tight', transparence=True)

    plt.figure()
    figure0(X)
コード例 #13
0
def test_qda_priors():
    clf = qda.QDA(priors=np.array([0.0, 1.0]))
    y_pred = clf.fit(X, y).predict(X)
    assert (y_pred == 2).all()
コード例 #14
0
#! /usr/bin/env python
from utils import *
from sklearn import lda
from sklearn import qda

classifier = lda.LDA()
classifier.fit(train[['x', 'y']].values, train['cls'].values)
prediction = classifier.predict_proba(test[['x', 'y']].values)[:, 1]
plotData(test)
plotContour(classifier.predict_proba)
savePlot('lda_classifier.png')
print("LDA", score(train, classifier.predict_proba),
      score(test, classifier.predict_proba),
      score(full, classifier.predict_proba))

classifier = qda.QDA()
classifier.fit(train[['x', 'y']].values, train['cls'].values)
prediction = classifier.predict_proba(test[['x', 'y']].values)[:, 1]
plotData(test)
plotContour(classifier.predict_proba)
savePlot('qda_classifier.png')
print("LDA", score(train, classifier.predict_proba),
      score(test, classifier.predict_proba),
      score(full, classifier.predict_proba))
コード例 #15
0
ファイル: lda.py プロジェクト: harrylclc/ist557
is_lda = 0

x, y = load_data(k=2)
pca = PCA(n_components=10)
pca.fit(x)
# print pca.explained_variance_ratio_
x = pca.transform(x)
# x = pca.fit_transform(x)
# exit()

kf = cross_validation.KFold(x.shape[0], n_fold)
acc, prec, recall = [], [], []
if is_lda:
    clf = lda.LDA()
else:
    clf = qda.QDA()

scaler = preprocessing.StandardScaler()
for train, test in kf:
    print 'iter {}'.format(len(acc))
    x_train, x_test, y_train, y_test = x[train], x[test], y[train], y[test]
    scaler.fit(x_train)
    clf.fit(scaler.transform(x_train), y_train)
    y_pred = clf.predict(scaler.transform(x_test))
    #     clf.fit(x_train, y_train)
    #     y_pred = clf.predict(x_test)
    acc.append(accuracy_score(y_test, y_pred))
    prec.append(precision_score(y_test, y_pred))
    recall.append(recall_score(y_test, y_pred))
    print acc
a = np.mean(acc)
コード例 #16
0

def LDApredict(x):
    """
	Input: x
	x (Array): An array of a data point to predict the value of.
	Returns: The predicted value of the data point.
	Description: Uses a lda to predict the value of the input data point.
	"""
    return LDA.predict(x)


#This is the Quadratic Discriminant Analysis Section

from sklearn import qda
QDA = qda.QDA()


def QDAfit(x, y):
    """
	Input: x, y
	x (Array): An array of training points for the svm to set up an algorithm.
	y (Array): An array of values for their corresponding training point.
	Returns: NA
	Description: Sets the qda with an algorithm to predict the input data values.
	"""
    QDA.fit(x, y)


def QDApredict(x):
    """