Esempio n. 1
0
class myQDABinary(myModel):
    def make(self , make_params ):
        self.model = QuadraticDiscriminantAnalysis(**make_params )
        return self

    def fit(self , xtrain , ytrain , xtest =None, ytest =None , fit_params = {} ):
        if type(xtrain) == pd.core.frame.DataFrame:
            self.model.fit(xtrain.astype('float32') , ytrain.astype('float32')  , **fit_params)
        else:
            self.model.fit(xtrain , ytrain  , **fit_params)

    def predict(self , xs , threshold = 0.5):
        if type(xs) == pd.core.frame.DataFrame:
            return self.model.predict(xs.astype('float32'))
        else:
            return self.model.predict(xs)
                    
    def predict_proba(self, xs):
        if type(xs) == pd.core.frame.DataFrame:
            return self.model.predict_proba(xs.astype('float32'))[:,1]
        else:
            if len(xs.shape) == 1:
                return self.model.predict_proba(xs.reshape(1,-1))
            else:
                return self.model.predict_proba(xs)
Esempio n. 2
0
def plot_qda(X, y):
    print('QDA: ')
    qda = QuadraticDiscriminantAnalysis()
    qda.fit(X, y)

    new_y = qda.predict(X)
    print("Train error rate: " + str(compare_y(new_y, y)))

    new_y = qda.predict(genre_x_test)
    print("Test error rate: " + str(compare_y(new_y, genre_y_test)))

    plot_errors(new_y, "QDA")

    colors = ['navy', 'turquoise', 'darkorange', 'red']
    target_names = ['jazz', 'rock', 'hip_hop', 'classical']

    plt.figure()
    for color, i, target_name in zip(colors, [0, 1, 2, 3], target_names):
        plt.scatter(X[y == i, 0],
                    X[y == i, 1],
                    alpha=.8,
                    color=color,
                    label=target_name,
                    s=.5)
    plt.legend(loc='best', shadow=False, scatterpoints=1)
    plt.title('QDA of music dataset')
    plt.xlabel("Dancebility")
    plt.ylabel("Energy")
    plt.show()
def test_qda_regularization():
    # The default is reg_param=0. and will cause issues when there is a
    # constant variable.

    # Fitting on data with constant variable triggers an UserWarning.
    collinear_msg = "Variables are collinear"
    clf = QuadraticDiscriminantAnalysis()
    with pytest.warns(UserWarning, match=collinear_msg):
        y_pred = clf.fit(X2, y6)

    # XXX: RuntimeWarning is also raised at predict time because of divisions
    # by zero when the model is fit with a constant feature and without
    # regularization: should this be considered a bug? Either by the fit-time
    # message more informative, raising and exception instead of a warning in
    # this case or somehow changing predict to avoid division by zero.
    with pytest.warns(RuntimeWarning, match="divide by zero"):
        y_pred = clf.predict(X2)
    assert np.any(y_pred != y6)

    # Adding a little regularization fixes the division by zero at predict
    # time. But UserWarning will persist at fit time.
    clf = QuadraticDiscriminantAnalysis(reg_param=0.01)
    with pytest.warns(UserWarning, match=collinear_msg):
        clf.fit(X2, y6)
    y_pred = clf.predict(X2)
    assert_array_equal(y_pred, y6)

    # UserWarning should also be there for the n_samples_in_a_class <
    # n_features case.
    clf = QuadraticDiscriminantAnalysis(reg_param=0.1)
    with pytest.warns(UserWarning, match=collinear_msg):
        clf.fit(X5, y5)
    y_pred5 = clf.predict(X5)
    assert_array_equal(y_pred5, y5)
Esempio n. 4
0
def m2_QDA(X, y, score_method='default', verbose=False):
    #split data
    Xt, Xv, yt, yv = train_test_split(X, y, test_size=0.2)
    yt1 = yt.apply(lambda g: g[0])

    # drop unique tracks
    ug = yt1.drop_duplicates(False).index
    Xt = Xt.drop(ug)
    yt = yt.drop(ug)
    yt1 = yt1.drop(ug)

    # fitting
    qda = QuadraticDiscriminantAnalysis(tol=10**-10)
    qda.fit(Xt, yt1)

    # predictions
    pt = qda.predict(Xt)
    terr = score(pt, yt)

    pv = qda.predict(Xv)
    verr = score(pv, yv)

    if verbose:
        print_scores(terr, verr, 'QDA')

    return verr
Esempio n. 5
0
def crossValidate(attributes, outcomes, foldCount, ownFunction=True):
    presList = []
    recallList = []
    accrList = []
    fMeasList = []
    aucList = []
    testingEstimate = []

    otcmVal = list(set(outcomes))
    params = {}
    featLen = 4

    attrFolds = getFolds(attributes, foldCount)
    otcmFolds = getFolds(outcomes, foldCount)

    testDataList = copy.copy(attrFolds)
    testOtcmList = copy.copy(otcmFolds)

    for itr in range(foldCount):
        trainDataList = []
        trainOtcmList = []
        for intitr in range(foldCount):
            if intitr != itr:
                trainDataList.append(attrFolds[intitr])
                trainOtcmList.append(otcmFolds[intitr])

        trainDataArr = np.array(trainDataList).reshape(-1, featLen)
        trainOtcmArr = np.array(trainOtcmList).reshape(-1)
        testDataArr = np.array(testDataList[itr]).reshape(-1, featLen)
        testOtcmArr = np.array(testOtcmList[itr]).reshape(-1)

        if ownFunction:
            params = getParams(trainDataArr, trainOtcmArr, otcmVal, featLen)
            testingEstimate = gdaNDEstimate(testDataArr, params, otcmVal)
        else:
            #clf = LinearDiscriminantAnalysis()
            clf = QuadraticDiscriminantAnalysis()
            clf.fit(trainDataArr, trainOtcmArr)
            trainingEstimate = clf.predict(trainDataArr)
            testingEstimate = clf.predict(testDataArr)

        if itr == 0 and len(otcmVal) == 2:
            addTitle = "Own" if ownFunction else "Inbuilt"
            metric = getMetrics(testOtcmArr,
                                testingEstimate,
                                otcmVal,
                                showPlot=True,
                                title="GDA2D Versicolor,Virginica - %s" %
                                addTitle)
        else:
            metric = getMetrics(testOtcmArr, testingEstimate, otcmVal)
        accrList.append(metric[0])
        presList.append(metric[1])
        recallList.append(metric[2])
        fMeasList.append(metric[3])
        aucList.append(metric[4])

    return accrList, presList, recallList, fMeasList, aucList
Esempio n. 6
0
def qda_predictor(x_train, y_train, x_test, y_test, give_clf = False):
	clf = QuadraticDiscriminantAnalysis()
	clf.fit(x_train, y_train)
	accuracy = clf.score(x_test, y_test)
	f1 = precision_recall_fscore_support(y_test, clf.predict(x_test), average = 'weighted')[2]
	print(precision_recall_fscore_support(y_test, clf.predict(x_test), average = 'weighted'))
	if not give_clf:
		return(accuracy, f1)
	else:
		return(clf)
Esempio n. 7
0
def train_l1_qda(x_train, x_test, y_train, y_test):
    clf = QuadraticDiscriminantAnalysis()
    clf.fit(x_train, y_train)

    if y_test is not None:
        print('QuadraticDiscriminantAnalysis:', clf.score(x_test, y_test))
    else:
        print('QuadraticDiscriminantAnalysis:', clf.score(x_train, y_train))
    test_res = np.reshape(clf.predict(x_train), (-1, 1))
    train_res = np.reshape(clf.predict(x_test), (-1, 1))
    return [test_res, train_res]
Esempio n. 8
0
def Quadratic_Discriminant_Analysis():
    Quadratic = QuadraticDiscriminantAnalysis()
    Quadratic.fit(X_train, y_train)
    
    predict = Quadratic.predict(X_train)
    print('train: ', accuracy_score(y_train, predict))
    print('train: ', classification_report(y_train, predict))
    
    predict = Quadratic.predict(X_test)
    print('test: ', accuracy_score(y_test, predict))
    print('test: ', classification_report(y_test, predict))
Esempio n. 9
0
def qda(train_size=None):
    _, _, X_train, X_test, y_train, y_test = dataset()
    if train_size:
        X_train, _, y_train, _ = train_test_split(X_train,
                                                  y_train,
                                                  train_size=train_size)

    qda = QDA()
    qda.fit(X_train, y_train)
    mae(y_test, qda.predict(X_test))
    confusion_matrix(y_test, qda.predict(X_test), qda.score(X_test, y_test))
Esempio n. 10
0
def test_qda(data):
    qda_clf = QDA()
    qda_clf.fit(data.train_x, data.train_y)

    qda_predict = qda_clf.predict(data.train_x)
    print('QDA')
    print('Classification accuracy for train data = {:.2%}'.format(
        metrics.accuracy_score(data.train_y, qda_predict)))

    test_result = qda_clf.predict(data.test_x)

    print('Classification accuracy for test data = {:.2%}'.format(
        metrics.accuracy_score(data.test_y, test_result)))
Esempio n. 11
0
def test_quadratic_discriminant_analysis(data):
    qda_clf = QDA()
    qda_clf.fit(data.train_x, data.train_y)

    qda_predict = qda_clf.predict(data.train_x)
    print('QDA')
    print('Classification accuracy for train data = {:.2%}'.format(
        metrics.accuracy_score(data.train_y, qda_predict)))

    pred_test = qda_clf.predict(data.test_x)

    print('Classification accuracy for test data = {:.2%}'.format(
        metrics.accuracy_score(data.test_y, pred_test)))
Esempio n. 12
0
def lda():
    X_train_feature, X_test_feature, y_train, y_test = train_test_data()

    print('Start training')
    # clf = LinearDiscriminantAnalysis()
    clf = QuadraticDiscriminantAnalysis()
    clf.fit(X_train_feature, y_train)
    y_pred = clf.predict(X_train_feature)
    result_analysis(y_pred, y_train)

    print('Start predicting')
    y_pred = clf.predict(X_test_feature)
    result_analysis(y_pred, y_test)
Esempio n. 13
0
def crossValidate(attributes, outcomes, foldCount, ownFunction=True):
    	presList =[]; recallList = []
	accrList = []; fMeasList = []
	aucList = []
	testingEstimate = []

	otcmVal = list(set(outcomes))
	params = {}; featLen = 4; 

	attrFolds = getFolds(attributes,foldCount)
	otcmFolds = getFolds(outcomes,foldCount)

	testDataList = copy.copy(attrFolds)
	testOtcmList = copy.copy(otcmFolds)

	
	for itr in range(foldCount):
		trainDataList = []
		trainOtcmList = []
		for intitr in range (foldCount):
			if intitr != itr:
				trainDataList.append(attrFolds[intitr]) 
				trainOtcmList.append(otcmFolds[intitr])

		trainDataArr = 	np.array(trainDataList).reshape(-1,featLen)
		trainOtcmArr =  np.array(trainOtcmList).reshape(-1)
		testDataArr = np.array(testDataList[itr]).reshape(-1,featLen)
		testOtcmArr = np.array(testOtcmList[itr]).reshape(-1)

		if ownFunction:
			params = getParams(trainDataArr,trainOtcmArr,otcmVal,featLen)
			testingEstimate = gdaNDEstimate(testDataArr,params,otcmVal)
		else:
			#clf = LinearDiscriminantAnalysis()
			clf = QuadraticDiscriminantAnalysis()
			clf.fit(trainDataArr,trainOtcmArr)
			trainingEstimate = clf.predict(trainDataArr) 
			testingEstimate = clf.predict(testDataArr)

		if itr == 0 and len(otcmVal)==2:			
			addTitle = "Own" if ownFunction else "Inbuilt"
			metric = getMetrics(testOtcmArr,testingEstimate,otcmVal,showPlot=True,title="GDA2D Versicolor,Virginica - %s"%addTitle)
		else:
			metric = getMetrics(testOtcmArr,testingEstimate,otcmVal)
		accrList.append(metric[0])
		presList.append(metric[1])
		recallList.append(metric[2])
		fMeasList.append(metric[3])
		aucList.append(metric[4])
		
	return accrList, presList, recallList, fMeasList, aucList
Esempio n. 14
0
def train_quadratic_discriminant_analysis(data_train, data_test, class_train,
                                          class_test):
    qda_clf = QDA()
    qda_clf.fit(data_train, class_train)

    pred_train = qda_clf.predict(data_train)
    print('Quadratic discriminant analysis')
    print('The accuracy of the classification on the training set of data')
    print('{:.2%}'.format(metrics.accuracy_score(class_train, pred_train)))

    pred_test = qda_clf.predict(data_test)

    print('The accuracy of classification on the test data set')
    print('{:.2%}'.format(metrics.accuracy_score(class_test, pred_test)))
Esempio n. 15
0
def QDA_classify(params, dataset, seed, classify):
    model_name = "QDA"
    print(model_name, params, dataset, seed)
    np.random.seed(108)
    start_time = timeit.default_timer()
    train_X, train_y, test_X, test_y = gen_train_test_data(dataset, seed)
    # build a classifier based on selected parameters
    # reg_param = UniformFloatHyperparameter('reg_param', 0.0, 1.0, default_value=0.0)
    model = QuadraticDiscriminantAnalysis(
        reg_param=round(params["reg_param"], 4))
    if classify == "test":
        model.fit(train_X, train_y)
        pred_y = model.predict(test_X)
        # maximize accuracy
        auc = accuracy_score(test_y, pred_y)
    if classify == "cv":
        scores = cross_val_score(model, train_X, train_y, cv=cv_train)
        auc = np.mean(scores)
    # minimize loss
    loss = 1.0 - auc
    end_time = timeit.default_timer()
    print("{}_runtime: {}(s)".format(model_name, round(end_time - start_time,
                                                       2)))
    del model

    # dictionary with information for evaluation
    return {'auc': auc, 'loss': loss, 'status': STATUS_OK}
Esempio n. 16
0
def qda(X, y, X_train, y_train, X_test, y_test):
    qda = QuadraticDiscriminantAnalysis()
    qda.fit(X_train, y_train)
    accuracy_qda = cross_val_score(qda, X, y).mean()
    print('Score: QDA {}'.format(accuracy_qda))
    predictions = qda.predict(X_test)
    print(confusion_matrix(y_test, predictions))
Esempio n. 17
0
def qda_lol(X, y, normalize=True, n_components=None, n_splits=5, n_repeats=5):
    kfold = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats)
    cm = []

    for train_idx, test_idx in kfold.split(X, y):
        y_train, y_test = y[train_idx], y[test_idx]
        if normalize:
            X_train, X_test = scale(X[train_idx]), scale(X[test_idx])
        else:
            X_train, X_test = X[train_idx], X[test_idx]

        p = LOL(n_components=n_components)
        X_train = p.fit_transform(X_train, y_train)
        X_test = p.transform(X_test)

        #features = p.explained_variance_ratio_ < 0.9

        #X_train = X_train[:, features]
        #X_test = X_test[:, features]

        l = QuadraticDiscriminantAnalysis()
        l.fit(X_train, y_train)
        pred = l.predict(X_test)
        cm.append(confusion_matrix(y_test, pred))

    cm, _ = compute_cm(cm)
    return cm
class QuadraticDiscriminantAnalysisImpl():
    def __init__(self,
                 priors=None,
                 reg_param=0.0,
                 store_covariance=False,
                 tol=0.0001,
                 store_covariances=None):
        self._hyperparams = {
            'priors': priors,
            'reg_param': reg_param,
            'store_covariance': store_covariance,
            'tol': tol,
            'store_covariances': store_covariances
        }

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def predict(self, X):
        return self._sklearn_model.predict(X)

    def predict_proba(self, X):
        return self._sklearn_model.predict_proba(X)
def qda(trainx, trainy, valx, valy):
    clf = QuadraticDiscriminantAnalysis()
    clf.fit(trainx, trainy)
    pred = clf.predict(valx)
    con_mat = confusion_matrix(valy, pred)
    acc = sum(valy == pred) / len(valy)
    return con_mat, acc
Esempio n. 20
0
def lda_window(df, header, width, title):
    lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
    qda = QuadraticDiscriminantAnalysis(store_covariance=True)

    df_train = df[:int(len(df)*0.8)].reset_index(drop=True).fillna(0)
    df_test = df[int(len(df)*0.8):].reset_index(drop=True).fillna(0)

    # df_train['product'] = np.ones(len(df_train))
    # df_test['product'] = np.ones(len(df_test))
    # for i, col in enumerate(headers):
    #     df_train['product'] = df_train['product'] * df_train[col]    
    #     df_test['product'] = df_test['product'] * df_test[col]

    X = window_stack(df_train[[header]], width=width)    
    # X = window_stack(df_train[['product']], width=width)
    y = df_train['cho2_b'][width-1:]
    print("Input shape" + str(X.shape))

    lda.fit(X, y)
    qda.fit(X, y)

    X = window_stack(df_test[[header]], width=width)
    # X = window_stack(df_test[['product']], width=width)
    y = df_test['cho_b'][width-1:]
    y_pred=lda.predict(X)
    utils.evaluate(y, y_pred, 0, f'LDA window of {header}')
    utils.plot_eval(df_test, y, y_pred, title=f'LDA window of {header}')
    y_pred=qda.predict(X)
    utils.evaluate(y, y_pred, 0, f'QDA window of {header}')
    utils.plot_eval(df_test, y, y_pred, title=f'QDA window of {header}')

    return lda, qda
Esempio n. 21
0
    def create_symbol_forecast_model(self):
        # Create a lagged series of the S&P500 US stock market index
        snpret = create_lagged_series(
            self.symbol_list[0], self.model_start_date,
            self.model_end_date, lags=5
        )

        # Use the prior two days of returns as predictor
        # values, with direction as the response
        x = snpret[["Lag1", "Lag2"]]
        y = snpret["Direction"]

        # Create training and test sets, each of them is series
        start_test = self.model_start_test_date
        x_train = x[x.index < start_test]
        x_test = x[x.index >= start_test]
        y_train = y[y.index < start_test]
        y_test = y[y.index >= start_test]

        model = QuadraticDiscriminantAnalysis()
        model.fit(x_train, y_train)

        # return nd array
        pred_test = model.predict(x_test)

        print("Error Rate is {0}".format((y_test != pred_test).sum() * 1. / len(y_test)))

        return model
Esempio n. 22
0
def classifier_qda(features, targets):
    """
    Classifier for Quadratic Discriminant analysis. Ouput the score of the classifier and serialize the model as pickle
    :param features: bag of words representation of our users' tweets
    :param targets: labels of these users
    :return: score of the learning algorithm
    """

    print('---------- QDA ------------')

    # Initialize, split the dataset, train and make predictions
    # We use the prior probabilities to make sure that even if we don't have exactly the same number of labels 1 and 0,
    # the prediction is not biased towards one or the other candidates
    qda = QuadraticDiscriminantAnalysis()
    train_set, test_set, train_features, test_features = train_test_split(
        features, targets, test_size=0.1)
    qda.fit(train_set, train_features)
    prediction = qda.predict(test_set)

    # Display the confusion matrix and the score of the prediction
    A = confusion_matrix(test_features, prediction)
    s = np.sum(A)
    print(A)
    print('Success: %f out of %d points' % (((A[0, 0] + A[1, 1]) / s), s))

    qda = QuadraticDiscriminantAnalysis()
    qda.fit(features, targets)
    # Serialize the model
    with open("C:/data/serialized/qda.p", "wb") as f:
        pickle.dump(qda, f)

    return prediction
    def trainingModels(self):

        if self.no == -1:
            clf2 = RandomForestClassifier(criterion="gini",
                                          random_state=60,
                                          max_depth=20,
                                          n_estimators=200)
            clf2.fit(self.featuretrain, self.labeltrain)
            self.predictiontest = clf2.predict(self.featuretest)
            self.no += 1
            self.displayAccuracy("RandomForestClassifier")

        elif self.no == 0:
            clf5 = GradientBoostingClassifier()
            clf5.fit(self.featuretrain, self.labeltrain)
            self.predictiontest = clf5.predict(self.featuretest)
            self.no += 1
            self.displayAccuracy("GradientBoostingClassifier")

        elif self.no == 1:
            clf4 = AdaBoostClassifier()
            clf4.fit(self.featuretrain, self.labeltrain)
            self.predictiontest = clf4.predict(self.featuretest)
            self.no += 1
            self.displayAccuracy("AdaBoostClassifier")

        elif self.no == 2:
            clf9 = LinearDiscriminantAnalysis()
            clf9.fit(self.featuretrain, self.labeltrain)
            self.predictiontest = clf9.predict(self.featuretest)
            self.no += 1
            self.displayAccuracy("LinearDiscriminantAnalysis")

        elif self.no == 3:
            clf10 = QuadraticDiscriminantAnalysis()
            clf10.fit(self.featuretrain, self.labeltrain)
            self.predictiontest = clf10.predict(self.featuretest)
            self.no += 1
            self.displayAccuracy("QuadraticDiscriminantAnalysis")

        elif self.no == 4:
            clf3 = DecisionTreeClassifier(criterion="gini",
                                          random_state=50,
                                          max_depth=20,
                                          min_samples_leaf=10)
            clf3.fit(self.featuretrain, self.labeltrain)
            self.predictiontest = clf3.predict(self.featuretest)
            self.no += 1
            self.displayAccuracy("DecisionTreeClassifier")

        elif self.no == 5:
            print("Score is:")
            print(((self.score) / 16281) * 100)
            self.finalresult.to_csv("Predicted_result.csv",
                                    sep=',',
                                    encoding='utf-8')
            self.richpeople.to_csv("Rich_people.csv",
                                   sep=',',
                                   encoding='utf-8')
            exit()
Esempio n. 24
0
def da_classify(X_train, y_train, X_test, y_test):
    t0 = time.time()
    clf = QuadraticDiscriminantAnalysis()
    clf.fit(X_train, y_train)
    print("da done in %0.3fs" % (time.time() - t0))
    print(1 -
          np.sum(np.abs(clf.predict(X_test) - y_test)) / float(0.3 * len(y)))
Esempio n. 25
0
def get_qda_oof_prediction(x_train,y_train,x_test):
    oof_train = np.zeros((ntrain,))
    oof_test = np.zeros((ntest,))
    oof_test_skf = np.empty((NFOLDS,ntest))
    
    for i,(train_ind,test_ind) in enumerate(skf.split(x_train,y_train)):
        model = QuadraticDiscriminantAnalysis()
        y_tr = y_train[train_ind]
        x_tr = x_train[train_ind]
        x_ts = x_train[test_ind]
        model.fit(x_tr,y_tr)
        oof_train[test_ind] = model.predict(x_ts)
        oof_test_skf[i,:] = model.predict(x_test)
        print("Test score {} ".format(f1_score(y_train[test_ind],oof_train[test_ind])))        
    oof_test = stats.mode(oof_test_skf,axis=0)[0]
    return oof_train.reshape(-1,1),oof_test.reshape(-1,1)
    def QDA(self, ):

        QDA = QuadraticDiscriminantAnalysis()

        QDA.fit(self.X_train, self.y_train)

        #Predict the test set results
        y_pred = QDA.predict(self.X_test)

        #Performance: AUC-ROC
        roc_auc_QDA = roc_auc_score(self.y_test, y_pred)
        print(
            'QDA completed: ROC-AUC: {}'.format(roc_auc_QDA) + '\n' +
            '------------------------------------------------------------------'
        )

        #Performance: AUC-PRC
        auc_prc_QDA = average_precision_score(self.y_test,
                                              y_pred,
                                              average='weighted')
        print(
            'QDA completed: AUC-PRC: {}'.format(auc_prc_QDA) + '\n' +
            '------------------------------------------------------------------'
        )

        #Performance: F1 Metric
        f1_QDA = f1_score(self.y_test, y_pred, average='weighted')
        print(
            'QDA completed: F1 metric: {}'.format(f1_QDA) + '\n' +
            '------------------------------------------------------------------'
        )

        return roc_auc_QDA, auc_prc_QDA, f1_QDA
Esempio n. 27
0
class QuadraticDiscriminantAnalysiscls(object):
    """docstring for ClassName"""
    def __init__(self):
        self.qda_cls = QuadraticDiscriminantAnalysis()
        self.prediction = None
        self.train_x = None
        self.train_y = None

    def train_model(self, train_x, train_y):
        try:
            self.train_x = train_x
            self.train_y = train_y
            self.qda_cls.fit(train_x, train_y)
        except:
            print(traceback.format_exc())

    def predict(self, test_x):
        try:
            self.test_x = test_x
            self.prediction = self.qda_cls.predict(test_x)
            return self.prediction
        except:
            print(traceback.format_exc())

    def accuracy_score(self, test_y):
        try:
            # return r2_score(test_y, self.prediction)
            return self.qda_cls.score(self.test_x, test_y)
        except:
            print(traceback.format_exc())
Esempio n. 28
0
def quadratic_discriminant_analysis(x_train, y_train, x_test, y_test, compute_threshold=True):
    '''
        Train Quadratic Discriminant Analysis (LDA) classifier on x_train and predict on x_test.

        x_train, x_test: DataFrames of shape data x features.
        n_components: Number of components (< n_classes - 1) for dimensionality reduction.
    '''
    from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

    # classWeights = {defs.posCode: 0.5, defs.negCode: 0.5}
    model = QuadraticDiscriminantAnalysis()
    #X_r2 = model.fit(x_train, y_train).transform(X)
    metricsCV = cross_val_analysis(classifier=model, x=x_train, y=y_train, plot=False)


    model.fit(x_train, y_train)


    if compute_threshold is True:
        probTest  = model.predict_proba(x_test)
        probTrain = model.predict_proba(x_train)

        bestThresh = get_best_thresh(y_train, probTrain)

        predTest    = np.where(probTest[:, 1] >= bestThresh, defs.posCode, defs.negCode)
    else:
        predTest    = model.predict(x_test)

    return predTest, metricsCV, model
Esempio n. 29
0
def k_folds_testing(training_df, label_df, model, splits=10, bestfeatures=None):
    
    kf = model_selection.KFold(n_splits=splits, shuffle=True)
    results = []
    weights_set = None
    for train_index, test_index in kf.split(training_df):
        
        if model == "QDA":
            reg = QuadraticDiscriminantAnalysis()
            
        elif model == "xgboost":
            pass
        
        elif model == "MLP":
            reg = MLPClassifier()
            
        elif model == "ADA":
            reg = AdaBoostClassifier()
        
        elif model == "DT":
            reg = DecisionTreeClassifier()
            
        elif model == "KNN":
            reg = KNeighborsClassifier()
            
        elif model == "GPC":
            reg = GaussianProcessClassifier(1.0 * RBF(1.0))
            
        elif model == "MNB":
            reg = MultinomialNB()
            
        reg.fit(training_df.iloc[train_index,:], label_df.iloc[train_index, :].values)
        predictions = reg.predict(training_df.iloc[test_index,:])
        if model == "MLP":
            weights = []
            for x in abs(reg.coefs_[-1]):
                weights.append(x[0])
            w = set(np.argsort(weights)[-90:].flat)
            
            if weights_set is None:
                weights_set = w
            else:
                weights_set = w.intersection(weights_set)
                print(weights_set)
        elif model != "QDA":
            print(reg.feature_importances_)
            w = set(np.argsort(reg.feature_importances_)[-15:].flat)
            
            if weights_set is None:
                weights_set = w
            else:
                weights_set = w.intersection(weights_set)
                print(weights_set)
        # determine accuracy
        accuracy = metrics.accuracy_score(label_df.iloc[test_index,:].values, predictions)
        results.append(accuracy)
        
        
    return [np.mean(results), np.std(results), weights_set]
Esempio n. 30
0
class SNPForecastingStrategy(Strategy):
    """    
    Requires:
    symbol - A stock symbol on which to form a strategy on.
    bars - A DataFrame of bars for the above symbol."""

    def __init__(self, symbol, bars):
        self.symbol = symbol
        self.bars = bars
        self.create_periods()
        self.fit_model()

    def create_periods(self):
        """Create training/test periods."""
        self.start_train = datetime.datetime(2001,1,10)
        self.start_test = datetime.datetime(2005,1,1)
        self.end_period = datetime.datetime(2005,12,31)

    def fit_model(self):
        """Fits a Quadratic Discriminant Analyser to the
        US stock market index (^GPSC in Yahoo)."""
        # Create a lagged series of the S&P500 US stock market index
        snpret = create_lagged_series(self.symbol, self.start_train, 
                                      self.end_period, lags=5) 

        # Use the prior two days of returns as 
        # predictor values, with direction as the response
        X = snpret[["Lag1","Lag2"]]
        y = snpret["Direction"]

        # Create training and test sets
        X_train = X[X.index < self.start_test]
        y_train = y[y.index < self.start_test]

        # Create the predicting factors for use 
        # in direction forecasting
        self.predictors = X[X.index >= self.start_test]

        # Create the Quadratic Discriminant Analysis model
        # and the forecasting strategy
        self.model = QuadraticDiscriminantAnalysis()
        self.model.fit(X_train, y_train)

    def generate_signals(self):
        
        """Returns the DataFrame of symbols containing the signals
        to go long, short or hold (1, -1 or 0)."""
        signals = pd.DataFrame(index=self.bars.index)
        signals['signal'] = 0.0       

        # Predict the subsequent period with the QDA model
        signals['signal'] = self.model.predict(self.predictors)

        # Remove the first five signal entries to eliminate
        # NaN issues with the signals DataFrame
        signals['signal'][0:5] = 0.0
        signals['positions'] = signals['signal'].diff() 

        return signals
Esempio n. 31
0
def qda(X, y, plot=False):
    clf = QuadraticDiscriminantAnalysis()
    clf.fit(X, y)
    if plot:
        plot_decision_boundary(lambda x: clf.predict(x), X, y)
        plt.title("QDA")
        plt.show()
    return clf
Esempio n. 32
0
def da_classify(X_train, y_train, X_cv, y_cv, X_test, y_test):
    from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

    clf = QuadraticDiscriminantAnalysis()
    clf.fit(X_train, y_train)
    pre_y_train = clf.predict(X_train)
    pre_y_cv = clf.predict(X_cv)
    pre_y_test = clf.predict(X_test)
    print("da   train Metrics : {0}".format(PRF(y_train, pre_y_train)))
    print("da   cv Metrics : {0}".format(PRF(y_cv, pre_y_cv)))
    print("da   test Metrics : {0}".format(PRF(y_test, pre_y_test)))
    print("Test PRF : {0}".format(
        precision_recall_fscore_support(y_test, pre_y_test)))
    print('The Accuracy of ' + 'da' + ' is :', clf.score(X_test, y_test))
    print(classification_report(y_test, pre_y_test))

    return clf
Esempio n. 33
0
def make_qda(X_train, X_test, y_train, y_test,):
    model = QuadraticDiscriminantAnalysis()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    get_classification_metrics(y_pred, y_test)

    return model
Esempio n. 34
0
def doQDA(x,digits,s):
    myLDA = LDA()
    myLDA.fit(x.PCA[:,:s],digits.train_Labels)
    newtest = digits.test_Images -x.centers
    [email protected](x.V[:s,:])
    labels = myLDA.predict(newtest)
    errors = class_error_rate(labels.reshape(1,labels.shape[0]),digits.test_Labels)
    return errors
Esempio n. 35
0
def quadraticdiscriminant(X_train,X_test,Y_train,Y_test):
    st = "QDA"
    print("Quadratic Discriminant Analysis")
    # labels.append(st)
    qd = QuadraticDiscriminantAnalysis()
    qd.fit(X_train,Y_train)
    prediction = qd.predict(X_test)
    accuracies.append(result(prediction,Y_test))
Esempio n. 36
0
def confusion(digits):
    myLDA = LDA()
    x = center_matrix_SVD(digits.train_Images)
    myLDA.fit(x.PCA[:,:50],digits.train_Labels)
    newtest = digits.test_Images -x.centers
    [email protected](x.V[:50,:])
    labels = myLDA.predict(newtest)
    import sklearn.metrics as f
    print(f.confusion_matrix(digits.test_Labels,labels))
def test_qda_regularization():
    # the default is reg_param=0. and will cause issues
    # when there is a constant variable
    clf = QuadraticDiscriminantAnalysis()
    with ignore_warnings():
        y_pred = clf.fit(X2, y6).predict(X2)
    assert np.any(y_pred != y6)

    # adding a little regularization fixes the problem
    clf = QuadraticDiscriminantAnalysis(reg_param=0.01)
    with ignore_warnings():
        clf.fit(X2, y6)
    y_pred = clf.predict(X2)
    assert_array_equal(y_pred, y6)

    # Case n_samples_in_a_class < n_features
    clf = QuadraticDiscriminantAnalysis(reg_param=0.1)
    with ignore_warnings():
        clf.fit(X5, y5)
    y_pred5 = clf.predict(X5)
    assert_array_equal(y_pred5, y5)
Esempio n. 38
0
    def train_DA(self, X, y, lda_comp, qda_reg):
        '''
        Input: 
            qda_reg - reg_param
            lda_comp - n_components
            X - data matrix (train_num, feat_num)
            y - target labels matrix (train_num, label_num)

        Output: 
            best_clf - best classifier trained (QDA/LDA)
            best_score - CV score of best classifier

        Find best DA classifier.
        '''
        n_samples, n_feat = X.shape
        cv_folds = 10
        kf = KFold(n_samples, cv_folds, shuffle=False)

        
        
        lda = LinearDiscriminantAnalysis(n_components = lda_comp)
        qda = QuadraticDiscriminantAnalysis(reg_param = qda_reg)
        score_total_lda = 0 #running total of metric score over all cv runs
        score_total_qda = 0 #running total of metric score over all cv runs
        for train_index, test_index in kf:
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            
            lda.fit(X_train, y_train)
            cv_pred_lda = lda.predict(X_test)
            score_lda = eval(self.metric + '(y_test[:,None], cv_pred_lda[:,None], "' + self.task + '")')
            score_total_lda += score_lda
            
            qda.fit(X_train,y_train)
            cv_pred_qda = qda.predict(X_test)
            score_qda = eval(self.metric + '(y_test[:,None], cv_pred_lda[:,None], "' + self.task + '")')
            score_total_qda += score_qda

        score_lda = score_total_lda/cv_folds
        score_qda = score_total_qda/cv_folds
        
        # We keep the best one
        if(score_qda > score_lda):
            qda.fit(X,y)
            return qda, score_qda
        else:
            lda.fit(X,y)
            return lda, score_lda
class road_estimation:
    def __init__(self, model_selection):
        self._train_data, self._train_targets, self._valid_data, self._valid_targets, self._test_data, self._test_targets = (
            data_load()
        )

        self._model_selection = model_selection
        self._classifier = []

    def train(self):
        if self._model_selection == "svm":
            # selected the svc in svm
            self._classifier = svm.SVC()
        elif self._model_selection == "nb":
            self._classifier = GaussianNB()
        elif self._model_selection == "knn":
            # parameter n_jobs can be set to -1 to enable parallel calculating
            self._classifier = KNeighborsClassifier(n_neighbors=7)
        elif self._model_selection == "ada":
            # Bunch of parameters, n_estimators, learning_rate
            self._classifier = AdaBoostClassifier()
        elif self._model_selection == "rf":
            # many parameters including n_jobs
            self._classifier = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
        elif self._model_selection == "qda":
            # complicated array like parameters, perhaps leave it default
            self._classifier = QuadraticDiscriminantAnalysis()
        else:
            print "Please refer to one classifier"

        self._classifier.fit(self._train_data, self._train_targets)
        # predict on valid data
        prediction_valid = self._classifier.predict(self._valid_data)
        # print validation result for selected model.
        print (
            "Classification report for classifier %s on valid_data:\n%s\n"
            % (self._model_selection, metrics.classification_report(self._valid_targets, prediction_valid))
        )

    def test(self):
        # predict on test data
        prediction_test = self._classifier.predict(self.test_data)
        # print test result for selected model.
        print (
            "Classification report for classifier %s on test_data:\n%s\n"
            % (self._model_selection, metrics.classification_report(self._test_targets, prediction_test))
        )

    def showPredictionImage(self):
        f = Feature()
        f.loadImage("um_000000.png")
        f.extractFeatures()
        fea_matrix = f.getFeaturesVectors()

        predict = self._classifier.predict(fea_matrix)
        image = np.copy(f.image)

        num_superpixels = np.max(f.superpixel) + 1
        for i in xrange(0, num_superpixels):
            indices = np.where(f.superpixel == i)
            if predict[i] == 1:

                image[indices[0], indices[1], 0] = 1
                image[indices[0], indices[1], 1] = 1
                image[indices[0], indices[1], 2] = 0
        plt.imshow(image)
        plt.show()
        # show prediction image with superpixels
        plt.imshow(mark_boundaries(image, superpixels))
        plt.show()
Esempio n. 40
0
    for i in range(9,18):
        labels.append(2)
    for i in range(18, 27):
        labels.append(3)
    '''
    # Creation of random labels
    for i in range(0,27):
        labels.append(int(random.random() * 3) + 1)
    print (labels)
    '''
    # QDA model
    qda = QuadraticDiscriminantAnalysis()
    qda.fit(comps, labels)

    # MCC Calculation
    y_pred = qda.predict(comps)
    #print(labels)
    #print(y_pred)
    mcc = multimcc(labels,y_pred)
    print("MCC="+str(mcc))

    '''
    # Plotting QDA contour
    nx, ny = 200, 100
    x_min, x_max = np.amin(comps[:,0]), np.amax(comps[:,0])
    y_min, y_max = np.amin(comps[:,1]), np.amax(comps[:,1])
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),np.linspace(y_min, y_max, ny))
    Z = qda.predict_proba(np.c_[xx.ravel(), yy.ravel()])
    Z = Z[:, 1].reshape(xx.shape)
    plt.contour(xx, yy, Z, [0.5], linewidths=5, colors = 'k', linestyles = 'dashed')
    '''
plt.plot(pca.components_.reshape((2,data.shape[0],data.shape[1])))

#plt.plot(pca.explained_variance_, linewidth=2)
#plt.title('Principal Component Analysis (PCA) Feature Assessment')

# Creation of labels
labels = []
for i in range(0,27):
    labels.append(1)
for i in range(27,53):
    labels.append(2)

# LDA model
lda = QuadraticDiscriminantAnalysis()
lda.fit(comps, labels)
y_pred = lda.predict(comps)
print(labels)
print(y_pred)
mcc = matthews_corrcoef(labels,y_pred)
print("MCC="+str(mcc))


# Plotting LDA contour
nx, ny = 200, 100
x_min, x_max = np.amin(comps[:,0]), np.amax(comps[:,0])
y_min, y_max = np.amin(comps[:,1]), np.amax(comps[:,1])
xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),np.linspace(y_min, y_max, ny))
Z = lda.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:, 1].reshape(xx.shape)
plt.contour(xx, yy, Z, [0.5], linewidths=5, colors = 'k', linestyles = 'dashed')
Esempio n. 42
0
    plt.ylabel('True label')
    plt.xlabel('Predicted label')


#define X y
X, y = data.loc[:,data.columns != 'state'].values, data.loc[:,data.columns == 'state'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

#smoteen
sme = SMOTEENN(random_state=42)
os_X,os_y = sme.fit_sample(X_train,y_train)

#QDA
clf_QDA = QuadraticDiscriminantAnalysis(store_covariances=True)
clf_QDA.fit(os_X, os_y)
y_true, y_pred = y_test, clf_QDA.predict(X_test)

#F1_score, precision, recall, specifity, G score
print "F1_score : %.4g" % metrics.f1_score(y_true, y_pred)  
print "Recall : %.4g" % metrics.recall_score(y_true, y_pred)
recall = metrics.recall_score(y_true, y_pred)  
print "Precision : %.4g" % metrics.precision_score(y_true, y_pred)
 
#Compute confusion matrix
cnf_matrix = confusion_matrix(y_test,y_pred)
np.set_printoptions(precision=2)
print "Specifity: " , float(cnf_matrix[0,0])/(cnf_matrix[0,0]+cnf_matrix[0,1])
specifity = float(cnf_matrix[0,0])/(cnf_matrix[0,0]+cnf_matrix[0,1]) 
print "G score: " , math.sqrt(recall/ specifity) 

#Plot non-normalized confusion matrix
What is the training misclassification rate?
"""

lda1 = LDA(solver="svd", store_covariance=True)
lda1.fit(warX,warY)

my_lda_pred = pd.DataFrame()
my_lda_pred["pred"] = ["No" if x == 0 else "Yes" for x in lda1.predict(warX)]
my_lda_pred["actual"] = ["No" if x == 0 else "Yes" for x in war["start"]]
conf_lda = pd.crosstab(my_lda_pred["pred"], my_lda_pred["actual"])
conf_lda

(1/(war.shape[0])) * (conf_lda.iloc[1,0] + conf_lda.iloc[0,1])


"""
6.69%
"""

qda1 = QDA(store_covariances=True)
qda1.fit(warX,warY)

test = qda1.predict_proba(warX)

my_qda_pred = pd.DataFrame()
my_qda_pred["pred"] = ["No" if x < .5 else "Yes" for x in qda1.predict(warX)]
my_qda_pred["actual"] = ["No" if x == 0 else "Yes" for x in war["start"]]
conf_qda = pd.crosstab(my_qda_pred["pred"], my_qda_pred["actual"])
conf_qda

(1/(war.shape[0])) * (conf_qda.iloc[1,0] + conf_qda.iloc[0,1])
Esempio n. 44
0
    #
    #        CREATE MODEL
    #
    ###########################################################################

    # Define the estimator: quadratic discriminant analysis
    from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

    qda = QuadraticDiscriminantAnalysis()

    qda.fit(training_data[0], training_data[1])

    from sklearn.metrics import accuracy_score

    # record the best result
    accuracies[i] = accuracy_score(test_data[1], qda.predict(test_data[0]))


mean_accuracy = accuracies.mean()
print("\n\nmean accuracy: %f" % mean_accuracy)

###############################################################################
#
#   VISUALIZE
#
###############################################################################
import matplotlib.pyplot as plt

mean_accuracies = np.zeros(shape=(n,))
for i in range(n):
    mean_accuracies[i] = accuracies[: i + 1].mean()
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
trans = LinearDiscriminantAnalysis(n_components=3)
trans.fit(X,y)
X = trans.transform(X)
"""
# Split Up Data
x_train,x_valid,y_train,y_valid = train_test_split(X,y,test_size=0.3,random_state=None)

# Train classifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
clf = QuadraticDiscriminantAnalysis(reg_param=0.00001)
clf.fit(x_train,y_train)

# Run Predictions
from sklearn.metrics import confusion_matrix, accuracy_score
y_preds = clf.predict(x_valid)
print( confusion_matrix(y_valid,y_preds) );
print( "Accuracy: %f" % (accuracy_score(y_valid,y_preds)) );
f = open('qda_take1.txt', 'w')
f.write( str(confusion_matrix(y_valid,y_preds)) );
f.write( "\nAccuracy: %f" % (accuracy_score(y_valid,y_preds)) );
f.write( "\nclf = QuadraticDiscriminantAnalysis(0.00001)" );

# Now on to final submission
x_final = testing.iloc[:,1:].values
y_final = clf.predict(x_final).reshape([62096,]);
y_final = pd.DataFrame(y_final);
numbahs = testing['id']
df = pd.concat([numbahs,y_final],axis=1)
df.columns = ['id','country']
df.to_csv("qda_take1.csv",index=False)
logreg = LogisticRegression().fit(X_train, y_train)
y_pred = logreg.predict(X_test)
y_pred_train = logreg.predict(X_train)
log_acc =  accuracy_score(y_pred, y_test) #0.64 highest

clf = DecisionTreeClassifier().fit(X_train, y_train)
y_pred = clf.predict(X_test)
clf_acc =  accuracy_score(y_pred, y_test) #0.61 

neigh = KNeighborsClassifier(n_neighbors=13).fit(X_train, y_train)
y_pred = neigh.predict(X_test)
nn_acc =  accuracy_score(y_pred, y_test) #0.61

quad = QuadraticDiscriminantAnalysis().fit(X_train, y_train)
y_pred = quad.predict(X_test)
quad_acc = accuracy_score(y_pred, y_test) # 0.19 very low

ldaC = LDA(solver='lsqr', shrinkage='auto').fit(X_train, y_train) #LDA with shrinkage
y_pred = ldaC.predict(X_test)
lda_acc = accuracy_score(y_pred, y_test) #0.58

#########################################
from sklearn.cross_validation import KFold
from sklearn.cross_validation import StratifiedKFold
import matplotlib.pyplot as plt

def calc_params(X, y, clf, param_values, param_name, K, metric = 'accuracy'):
    '''This function takes the classfier, the training data and labels, the name of the
    parameter to vary, a list of values to vary by, and a number of folds needed for 
    cross validation and returns a the test and train scores (accuracy or recall) and also