def test_qda_priors(): clf = qda.QDA() y_pred = clf.fit(X, y).predict(X) n_pos = np.sum(y_pred == 2) neg = 1e-10 clf = qda.QDA(priors=np.array([neg, 1 - neg])) y_pred = clf.fit(X, y).predict(X) n_pos2 = np.sum(y_pred == 2) assert_greater(n_pos2, n_pos)
def test_qda_regularization(): # the default is reg_param=0. and will cause issues # when there is a constant variable clf = qda.QDA() y_pred = clf.fit(X2, y).predict(X2) assert_true(np.any(y_pred != y)) # adding a little regularization fixes the problem clf = qda.QDA(reg_param=0.01) y_pred = clf.fit(X2, y).predict(X2) assert_array_equal(y_pred, y)
def test_qda_store_covariances(): # The default is to not set the covariances_ attribute clf = qda.QDA().fit(X, y) assert_true(not hasattr(clf, 'covariances_')) # Test the actual attribute: clf = qda.QDA().fit(X, y, store_covariances=True) assert_true(hasattr(clf, 'covariances_')) assert_array_almost_equal(clf.covariances_[0], np.array([[0.7, 0.45], [0.45, 0.7]])) assert_array_almost_equal( clf.covariances_[1], np.array([[0.33333333, -0.33333333], [-0.33333333, 0.66666667]]))
def getPredictionAcc(classifier, components, tr_x, tr_y, te_x, te_y): """ given a classifier choice, a desired dimensionality reduction, and test and training data, train a model and make predictions on the test balanced_set return the accuracy of the generated model Classifier Choices: 'SGD', 'Linear-SVC', 'SVC-rbf', 'Perceptron-L1', 'Perceptron-L2', 'kNN', 'QDA' """ choices = { 'SGD': linear_model.SGDClassifier(), 'Linear-SVC': svm.LinearSVC(), 'SVC-rbf': svm.SVC(kernel='rbf'), 'Perceptron-L1': linear_model.Perceptron(penalty='l1'), 'Perceptron-L2': linear_model.Perceptron(penalty='l2', n_iter=25), 'kNN': neighbors.KNeighborsClassifier(), 'QDA': qda.QDA(), } # clf = Pipeline([('vect', CountVectorizer(stop_words='english', encoding='latin-1')), clf = Pipeline([ ('vect', CountVectorizer(encoding='latin-1')), # 5a - this strongly affects the quality of the result ... # ('GRP', GaussianRandomProjection(n_components=components)), ('GRP', SparseRandomProjection(n_components=components, dense_output=True)), # 5b ('Scaler', StandardScaler()), # 5c (classifier, choices[classifier]) ]) clf = clf.fit(tr_x, tr_y) predicted = clf.predict(te_x) return np.mean(predicted == te_y)
def test_qda(): """ QDA classification. This checks that QDA implements fit and predict and returns correct values for a simple toy dataset. """ clf = qda.QDA() y_pred = clf.fit(X, y).predict(X) assert_array_equal(y_pred, y) # Assure that it works with 1D data y_pred1 = clf.fit(X1, y).predict(X1) assert_array_equal(y_pred1, y) # Test probas estimates y_proba_pred1 = clf.predict_proba(X1) assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y) y_log_proba_pred1 = clf.predict_log_proba(X1) assert_array_almost_equal(np.exp(y_log_proba_pred1), y_proba_pred1, 8) y_pred3 = clf.fit(X, y3).predict(X) # QDA shouldn't be able to separate those assert_true(np.any(y_pred3 != y3)) # Classes should have at least 2 elements assert_raises(ValueError, clf.fit, X, y4)
def __init__(self, classifier): if classifier == 'svm': self.clf = svm.SCV() elif classifier == 'lda': self.clf = lda.LDA() elif classifier == 'qda': self.clf = qda.QDA()
def buildModelLDA(self, outputFile, priorProbs=[0.5, 0.5]): classifier = qda.QDA(priors=priorProbs) classifier.fit(self.instances, self.classes) modelData = pickle.dumps(classifier) f = open(outputFile, "w") f.write(modelData) f.close()
def random_forest(X, t): clf = qda.QDA() clf.fit(X, t) def random_forest_predict(x): return clf.predict_proba(x)[:, 1] return random_forest_predict
def get_classifier(classifier_str): ''' This functions maps the classifier string classifier_str to the corresponding classifier object with the default paramers set. ''' # SVC if (classifier_str == 'linearsvc'): cl = svm.LinearSVC(**svm_default_param) elif (classifier_str == 'svc_linear'): libsvm_default_param['kernel'] = 'linear' cl = svm.SVC(**libsvm_default_param) elif (classifier_str == 'svc_rbf'): libsvm_default_param['kernel'] = 'rbf' cl = svm.SVC(**libsvm_default_param) # polynomial, sigmoid kernel # nuSVC # Nearest Neighbors (euclidian distance used by default) elif (classifier_str == 'kn_uniform'): kn_default_param['weights'] = 'uniform' cl = neighbors.KNeighborsClassifier(**kn_default_param) elif (classifier_str == 'kn_distance'): kn_default_param['weights'] = 'distance' cl = neighbors.KNeighborsClassifier(**kn_default_param) elif (classifier_str == 'rn_uniform'): rn_default_param['weights'] = 'uniform' cl = neighbors.RadiusNeighborsClassifier(**rn_default_param) elif (classifier_str == 'rn_distance'): rn_default_param['weights'] = 'distance' cl = neighbors.RadiusNeighborsClassifier(**rn_default_param) elif (classifier_str == 'nc'): cl = neighbors.NearestCentroid() # LDA and QDA, priors are by default set to 1/len(class) for each class elif (classifier_str == 'lda'): cl = lda.LDA() elif (classifier_str == 'qda'): cl = qda.QDA() # Gaussion naive bayes # from the code it is unclear how priors are set elif (classifier_str == 'gnb'): cl = naive_bayes.GaussianNB() elif (classifier_str == 'mnb'): cl = naive_bayes.MultinomialNB() elif (classifier_str == 'bnb'): cl = naive_bayes.BernoulliNB() # Decision tree elif (classifier_str == 'dtree'): cl = tree.DecisionTreeClassifier() elif (classifier_str == 'rforest'): cl = ensemble.RandomForestClassifier() else: # raise error if classifier not found raise ValueError('Classifier not implemented: %s' % (classifier_str)) return (cl)
def test_qda_regularization(): # the default is reg_param=0. and will cause issues # when there is a constant variable clf = qda.QDA() with ignore_warnings(): y_pred = clf.fit(X2, y).predict(X2) assert_true(np.any(y_pred != y)) # adding a little regularization fixes the problem clf = qda.QDA(reg_param=0.01) with ignore_warnings(): clf.fit(X2, y) y_pred = clf.predict(X2) assert_array_equal(y_pred, y) # Case n_samples_in_a_class < n_features clf = qda.QDA(reg_param=0.1) with ignore_warnings(): clf.fit(X5, y5) y_pred5 = clf.predict(X5) assert_array_equal(y_pred5, y5)
def test_QDA(self): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) y = np.array([1, 1, 1, 2, 2, 2]) df = pdml.ModelFrame(X, target=y) mod1 = df.qda.QDA() mod2 = qda.QDA() df.fit(mod1) mod2.fit(X, y) result = df.predict(mod1) expected = mod2.predict(X) self.assertTrue(isinstance(result, pdml.ModelSeries)) self.assert_numpy_array_equal(result.values, expected)
# pca.fit() # # # pca = grid.best_estimator_ if __name__ == '__main__': REGEX = re.compile(sys.argv[1]) INPUT = sys.argv[2] NFOLDS = 10 classifiers = [ ('SVC', '#00995C', svm.SVC(kernel='linear', class_weight='auto', random_state=1)), ('LSVC', '#5C991F', svm.LinearSVC(class_weight='auto',random_state=1)), ('QDA', '#995C1F', qda.QDA()), ('LDA', '#9966FF', lda.LDA()), ('RF', '#991F5C', RandomForestClassifier(class_weight='auto', random_state=1)), ] X, Y = load_data(INPUT, REGEX) Xscaled = preprocessing.scale(X) N = X.shape[0] attr_range = range(1, N/2, 5) def save(fname): savefig(fname, bbox_inches='tight', transparence=True) plt.figure() figure0(X)
def test_qda_priors(): clf = qda.QDA(priors=np.array([0.0, 1.0])) y_pred = clf.fit(X, y).predict(X) assert (y_pred == 2).all()
#! /usr/bin/env python from utils import * from sklearn import lda from sklearn import qda classifier = lda.LDA() classifier.fit(train[['x', 'y']].values, train['cls'].values) prediction = classifier.predict_proba(test[['x', 'y']].values)[:, 1] plotData(test) plotContour(classifier.predict_proba) savePlot('lda_classifier.png') print("LDA", score(train, classifier.predict_proba), score(test, classifier.predict_proba), score(full, classifier.predict_proba)) classifier = qda.QDA() classifier.fit(train[['x', 'y']].values, train['cls'].values) prediction = classifier.predict_proba(test[['x', 'y']].values)[:, 1] plotData(test) plotContour(classifier.predict_proba) savePlot('qda_classifier.png') print("LDA", score(train, classifier.predict_proba), score(test, classifier.predict_proba), score(full, classifier.predict_proba))
is_lda = 0 x, y = load_data(k=2) pca = PCA(n_components=10) pca.fit(x) # print pca.explained_variance_ratio_ x = pca.transform(x) # x = pca.fit_transform(x) # exit() kf = cross_validation.KFold(x.shape[0], n_fold) acc, prec, recall = [], [], [] if is_lda: clf = lda.LDA() else: clf = qda.QDA() scaler = preprocessing.StandardScaler() for train, test in kf: print 'iter {}'.format(len(acc)) x_train, x_test, y_train, y_test = x[train], x[test], y[train], y[test] scaler.fit(x_train) clf.fit(scaler.transform(x_train), y_train) y_pred = clf.predict(scaler.transform(x_test)) # clf.fit(x_train, y_train) # y_pred = clf.predict(x_test) acc.append(accuracy_score(y_test, y_pred)) prec.append(precision_score(y_test, y_pred)) recall.append(recall_score(y_test, y_pred)) print acc a = np.mean(acc)
def LDApredict(x): """ Input: x x (Array): An array of a data point to predict the value of. Returns: The predicted value of the data point. Description: Uses a lda to predict the value of the input data point. """ return LDA.predict(x) #This is the Quadratic Discriminant Analysis Section from sklearn import qda QDA = qda.QDA() def QDAfit(x, y): """ Input: x, y x (Array): An array of training points for the svm to set up an algorithm. y (Array): An array of values for their corresponding training point. Returns: NA Description: Sets the qda with an algorithm to predict the input data values. """ QDA.fit(x, y) def QDApredict(x): """