예제 #1
0
class QuadraticDiscriminantAnalysiscls(object):
    """docstring for ClassName"""
    def __init__(self):
        self.qda_cls = QuadraticDiscriminantAnalysis()
        self.prediction = None
        self.train_x = None
        self.train_y = None

    def train_model(self, train_x, train_y):
        try:
            self.train_x = train_x
            self.train_y = train_y
            self.qda_cls.fit(train_x, train_y)
        except:
            print(traceback.format_exc())

    def predict(self, test_x):
        try:
            self.test_x = test_x
            self.prediction = self.qda_cls.predict(test_x)
            return self.prediction
        except:
            print(traceback.format_exc())

    def accuracy_score(self, test_y):
        try:
            # return r2_score(test_y, self.prediction)
            return self.qda_cls.score(self.test_x, test_y)
        except:
            print(traceback.format_exc())
예제 #2
0
    def create_symbol_forecast_model(self):
        # Create a lagged series of the S&P500 US stock market index
        snpret = create_lagged_series(
            self.symbol_list[0], self.model_start_date,
            self.model_end_date, lags=5
        )

        # Use the prior two days of returns as predictor
        # values, with direction as the response
        x = snpret[["Lag1", "Lag2"]]
        y = snpret["Direction"]

        # Create training and test sets, each of them is series
        start_test = self.model_start_test_date
        x_train = x[x.index < start_test]
        x_test = x[x.index >= start_test]
        y_train = y[y.index < start_test]
        y_test = y[y.index >= start_test]

        model = QuadraticDiscriminantAnalysis()
        model.fit(x_train, y_train)

        # return nd array
        pred_test = model.predict(x_test)

        print("Error Rate is {0}".format((y_test != pred_test).sum() * 1. / len(y_test)))

        return model
예제 #3
0
파일: forecaster.py 프로젝트: Vegeb/strats
class SNPForecastingStrategy(Strategy):
    """    
    Requires:
    symbol - A stock symbol on which to form a strategy on.
    bars - A DataFrame of bars for the above symbol."""

    def __init__(self, symbol, bars):
        self.symbol = symbol
        self.bars = bars
        self.create_periods()
        self.fit_model()

    def create_periods(self):
        """Create training/test periods."""
        self.start_train = datetime.datetime(2001,1,10)
        self.start_test = datetime.datetime(2005,1,1)
        self.end_period = datetime.datetime(2005,12,31)

    def fit_model(self):
        """Fits a Quadratic Discriminant Analyser to the
        US stock market index (^GPSC in Yahoo)."""
        # Create a lagged series of the S&P500 US stock market index
        snpret = create_lagged_series(self.symbol, self.start_train, 
                                      self.end_period, lags=5) 

        # Use the prior two days of returns as 
        # predictor values, with direction as the response
        X = snpret[["Lag1","Lag2"]]
        y = snpret["Direction"]

        # Create training and test sets
        X_train = X[X.index < self.start_test]
        y_train = y[y.index < self.start_test]

        # Create the predicting factors for use 
        # in direction forecasting
        self.predictors = X[X.index >= self.start_test]

        # Create the Quadratic Discriminant Analysis model
        # and the forecasting strategy
        self.model = QuadraticDiscriminantAnalysis()
        self.model.fit(X_train, y_train)

    def generate_signals(self):
        
        """Returns the DataFrame of symbols containing the signals
        to go long, short or hold (1, -1 or 0)."""
        signals = pd.DataFrame(index=self.bars.index)
        signals['signal'] = 0.0       

        # Predict the subsequent period with the QDA model
        signals['signal'] = self.model.predict(self.predictors)

        # Remove the first five signal entries to eliminate
        # NaN issues with the signals DataFrame
        signals['signal'][0:5] = 0.0
        signals['positions'] = signals['signal'].diff() 

        return signals
예제 #4
0
def doQDA(x,digits,s):
    myLDA = LDA()
    myLDA.fit(x.PCA[:,:s],digits.train_Labels)
    newtest = digits.test_Images -x.centers
    [email protected](x.V[:s,:])
    labels = myLDA.predict(newtest)
    errors = class_error_rate(labels.reshape(1,labels.shape[0]),digits.test_Labels)
    return errors
예제 #5
0
def confusion(digits):
    myLDA = LDA()
    x = center_matrix_SVD(digits.train_Images)
    myLDA.fit(x.PCA[:,:50],digits.train_Labels)
    newtest = digits.test_Images -x.centers
    [email protected](x.V[:50,:])
    labels = myLDA.predict(newtest)
    import sklearn.metrics as f
    print(f.confusion_matrix(digits.test_Labels,labels))
def test_qda_priors():
    clf = QuadraticDiscriminantAnalysis()
    y_pred = clf.fit(X6, y6).predict(X6)
    n_pos = np.sum(y_pred == 2)

    neg = 1e-10
    clf = QuadraticDiscriminantAnalysis(priors=np.array([neg, 1 - neg]))
    y_pred = clf.fit(X6, y6).predict(X6)
    n_pos2 = np.sum(y_pred == 2)

    assert_greater(n_pos2, n_pos)
예제 #7
0
def QD(pth):
     train_desc=np.load(pth+'/training_features.npy')
     nbr_occurences = np.sum( (train_desc > 0) * 1, axis = 0)
     idf = np.array(np.log((1.0*len(image_paths)+1) / (1.0*nbr_occurences + 1)), 'float32')

# Scaling the words
     stdSlr = StandardScaler().fit(train_desc)
     train_desc = stdSlr.transform(train_desc)
     modelQD=QuadraticDiscriminantAnalysis()
     modelQD.fit(train_desc,np.array(train_labels))
     joblib.dump((modelQD, img_classes, stdSlr), pth+"/qd-bof.pkl", compress=3) 
     test(pth, "qd-")
예제 #8
0
def get_QDA(Xtrain, Ytrain, Xtest = None , Ytest = None, verbose = 0):
    qda = QDA()
    qda.fit(Xtrain,Ytrain)
    
    scores = np.empty((2))
    if (verbose == 1):
        scores[0] = qda.score(Xtrain,Ytrain)
        print('QDA, train: {0:.02f}% '.format(scores[0]*100))
        if (type(Xtest) != type(None)):
            scores[1] = qda.score(Xtest,Ytest)
            print('QDA, test: {0:.02f}% '.format(scores[1]*100))
    return qda
예제 #9
0
def crossValidate(attributes, outcomes, foldCount, ownFunction=True):
    	presList =[]; recallList = []
	accrList = []; fMeasList = []
	aucList = []
	testingEstimate = []

	otcmVal = list(set(outcomes))
	params = {}; featLen = 4; 

	attrFolds = getFolds(attributes,foldCount)
	otcmFolds = getFolds(outcomes,foldCount)

	testDataList = copy.copy(attrFolds)
	testOtcmList = copy.copy(otcmFolds)

	
	for itr in range(foldCount):
		trainDataList = []
		trainOtcmList = []
		for intitr in range (foldCount):
			if intitr != itr:
				trainDataList.append(attrFolds[intitr]) 
				trainOtcmList.append(otcmFolds[intitr])

		trainDataArr = 	np.array(trainDataList).reshape(-1,featLen)
		trainOtcmArr =  np.array(trainOtcmList).reshape(-1)
		testDataArr = np.array(testDataList[itr]).reshape(-1,featLen)
		testOtcmArr = np.array(testOtcmList[itr]).reshape(-1)

		if ownFunction:
			params = getParams(trainDataArr,trainOtcmArr,otcmVal,featLen)
			testingEstimate = gdaNDEstimate(testDataArr,params,otcmVal)
		else:
			#clf = LinearDiscriminantAnalysis()
			clf = QuadraticDiscriminantAnalysis()
			clf.fit(trainDataArr,trainOtcmArr)
			trainingEstimate = clf.predict(trainDataArr) 
			testingEstimate = clf.predict(testDataArr)

		if itr == 0 and len(otcmVal)==2:			
			addTitle = "Own" if ownFunction else "Inbuilt"
			metric = getMetrics(testOtcmArr,testingEstimate,otcmVal,showPlot=True,title="GDA2D Versicolor,Virginica - %s"%addTitle)
		else:
			metric = getMetrics(testOtcmArr,testingEstimate,otcmVal)
		accrList.append(metric[0])
		presList.append(metric[1])
		recallList.append(metric[2])
		fMeasList.append(metric[3])
		aucList.append(metric[4])
		
	return accrList, presList, recallList, fMeasList, aucList
예제 #10
0
    def train_DA(self, X, y, lda_comp, qda_reg):
        '''
        Input: 
            qda_reg - reg_param
            lda_comp - n_components
            X - data matrix (train_num, feat_num)
            y - target labels matrix (train_num, label_num)

        Output: 
            best_clf - best classifier trained (QDA/LDA)
            best_score - CV score of best classifier

        Find best DA classifier.
        '''
        n_samples, n_feat = X.shape
        cv_folds = 10
        kf = KFold(n_samples, cv_folds, shuffle=False)

        
        
        lda = LinearDiscriminantAnalysis(n_components = lda_comp)
        qda = QuadraticDiscriminantAnalysis(reg_param = qda_reg)
        score_total_lda = 0 #running total of metric score over all cv runs
        score_total_qda = 0 #running total of metric score over all cv runs
        for train_index, test_index in kf:
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
            
            lda.fit(X_train, y_train)
            cv_pred_lda = lda.predict(X_test)
            score_lda = eval(self.metric + '(y_test[:,None], cv_pred_lda[:,None], "' + self.task + '")')
            score_total_lda += score_lda
            
            qda.fit(X_train,y_train)
            cv_pred_qda = qda.predict(X_test)
            score_qda = eval(self.metric + '(y_test[:,None], cv_pred_lda[:,None], "' + self.task + '")')
            score_total_qda += score_qda

        score_lda = score_total_lda/cv_folds
        score_qda = score_total_qda/cv_folds
        
        # We keep the best one
        if(score_qda > score_lda):
            qda.fit(X,y)
            return qda, score_qda
        else:
            lda.fit(X,y)
            return lda, score_lda
class QuadraticDiscriminantAnalysisPredictor(PredictorBase):
    '''
    Quadratic Discriminant Analysis
    '''

    def __init__(self):
        self.clf = QuadraticDiscriminantAnalysis()

    def fit(self, X_train, y_train):
        self.clf.fit(X_train, y_train)

    def predict(self, X_test):
        predictions = self.clf.predict_proba(X_test)
        predictions_df = self.bundle_predictions(predictions)

        return predictions_df

    def get_k_best_k(self):
        return 4
def test_qda_regularization():
    # the default is reg_param=0. and will cause issues
    # when there is a constant variable
    clf = QuadraticDiscriminantAnalysis()
    with ignore_warnings():
        y_pred = clf.fit(X2, y6).predict(X2)
    assert np.any(y_pred != y6)

    # adding a little regularization fixes the problem
    clf = QuadraticDiscriminantAnalysis(reg_param=0.01)
    with ignore_warnings():
        clf.fit(X2, y6)
    y_pred = clf.predict(X2)
    assert_array_equal(y_pred, y6)

    # Case n_samples_in_a_class < n_features
    clf = QuadraticDiscriminantAnalysis(reg_param=0.1)
    with ignore_warnings():
        clf.fit(X5, y5)
    y_pred5 = clf.predict(X5)
    assert_array_equal(y_pred5, y5)
예제 #13
0
    def create_symbol_forecast_model(self):
        # Create a lagged series of the S&P500 US stock market index
        snpret = create_lagged_series(
            self.symbol_list[0], self.model_start_date,
            self.model_end_date, lags=5
        )

        # Use the prior two days of returns as predictor
        # values, with direction as the response
        X = snpret[["Lag1", "Lag2"]]
        y = snpret["Direction"]
        
        # Skip days with NaN
        skip_till_date = self.model_start_date + relativedelta(days=3)
        X = X[X.index > skip_till_date]
        y = y[y.index > skip_till_date]
        logging.debug(snpret[snpret.index > skip_till_date])

        model = QDA()
        model.fit(X, y)
        return model
예제 #14
0
파일: mustang.py 프로젝트: rioubenson/eagle
    def set_up_classifier(self):
        historic_data = self.get_data()
        # Key is to identify a trend (use close for now)
        historic_data['return_5_timeframe'] = np.log(historic_data['Close'] / historic_data['Close'].shift(5)) * 100
        historic_data.fillna(0.0001, inplace=True)
        historic_data['vol_normalised'] = normalise_data(historic_data['Volume'])

        # Bucket Return
        def bucket_return(x, col):
            if 0 < x[col] < 0.02:
                return 1
            if 0.02 < x[col] < 0.1:
                return 2
            if x[col] > 0.1:
                return 3

            if 0 > x[col] > -0.02:
                return -1
            if -0.02 > x[col] > -0.1:
                return -2
            if x[col] < -0.1:
                return -3
            else:
                return 0

        historic_data['Return'] = historic_data.apply(bucket_return, axis=1, args=['return_5_timeframe'])

        historic_data['Move'] = historic_data['Close'] - historic_data['Open']

        # X as predictor values, with Y as the response
        x = historic_data[["Move"]]
        y = historic_data["Return"]

        model = QuadraticDiscriminantAnalysis()
        model.fit(x, y)
        return model
def test_qda():
    # QDA classification.
    # This checks that QDA implements fit and predict and returns
    # correct values for a simple toy dataset.
    clf = QuadraticDiscriminantAnalysis()
    y_pred = clf.fit(X6, y6).predict(X6)
    assert_array_equal(y_pred, y6)

    # Assure that it works with 1D data
    y_pred1 = clf.fit(X7, y6).predict(X7)
    assert_array_equal(y_pred1, y6)

    # Test probas estimates
    y_proba_pred1 = clf.predict_proba(X7)
    assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y6)
    y_log_proba_pred1 = clf.predict_log_proba(X7)
    assert_array_almost_equal(np.exp(y_log_proba_pred1), y_proba_pred1, 8)

    y_pred3 = clf.fit(X6, y7).predict(X6)
    # QDA shouldn't be able to separate those
    assert np.any(y_pred3 != y7)

    # Classes should have at least 2 elements
    assert_raises(ValueError, clf.fit, X6, y4)
예제 #16
0
def test():
    for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]):
        # Linear Discriminant Analysis
        lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
        y_pred = lda.fit(X, y).predict(X)
        splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1)
        plot_lda_cov(lda, splot)
        plt.axis('tight')
    
        # Quadratic Discriminant Analysis
        qda = QuadraticDiscriminantAnalysis(store_covariances=True)
        y_pred = qda.fit(X, y).predict(X)
        splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2)
        plot_qda_cov(qda, splot)
        plt.axis('tight')
    plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant Analysis')
    plt.show()
예제 #17
0
                             o_dim=20,
                             g_size=2)
print("Time elapsed", start - time.time())
print("Dimension reduced shape", Train.shape)
## Linear Discriminant Analysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
y_pred = lda.fit(Train, train_labels).predict(Train)
print("1 -- LDA")
from sklearn.metrics import accuracy_score
print(accuracy_score(train_labels, y_pred, normalize=True, sample_weight=None))

qda = QuadraticDiscriminantAnalysis(store_covariances=True)
y_pred = qda.fit(Train, train_labels).predict(Train)
print("2 -- QDA")
from sklearn.metrics import accuracy_score
print(accuracy_score(train_labels, y_pred, normalize=True, sample_weight=None))

from sklearn.naive_bayes import GaussianNB
print("3 -- GaussianNB")
gnb = GaussianNB()
y_pred = gnb.fit(Train, train_labels).predict(Train)
from sklearn.metrics import accuracy_score
print(accuracy_score(train_labels, y_pred, normalize=True, sample_weight=None))

from sklearn.svm import SVC
print("4 -- SVC")
clf = SVC(kernel="linear", C=0.025)
y_pred = clf.fit(Train, train_labels).predict(Train)
예제 #18
0
def discriminatePlot(X, y, cVal, titleStr=''):
    # Frederic's Robust Wrapper for discriminant analysis function.  Performs lda, qda and RF afer error checking,
    # Generates nice plots and returns cross-validated
    # performance, stderr and base line.
    # X np array n rows x p parameters
    # y group labels n rows
    # rgb color code for each data point - should be the same for each data beloging to the same group
    # titleStr title for plots
    # returns: ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses

    # Global Parameters
    CVFOLDS = 10
    MINCOUNT = 10
    MINCOUNTTRAINING = 5

    # Initialize Variables and clean up data
    classes, classesCount = np.unique(
        y, return_counts=True
    )  # Classes to be discriminated should be same as ldaMod.classes_
    goodIndClasses = np.array([n >= MINCOUNT for n in classesCount])
    goodInd = np.array([b in classes[goodIndClasses] for b in y])
    yGood = y[goodInd]
    XGood = X[goodInd]
    cValGood = cVal[goodInd]

    classes, classesCount = np.unique(yGood, return_counts=True)
    nClasses = classes.size  # Number of classes or groups

    # Do we have enough data?
    if (nClasses < 2):
        print 'Error in ldaPLot: Insufficient classes with minimun data (%d) for discrimination analysis' % (
            MINCOUNT)
        return -1, -1, -1, -1, -1, -1, -1
    cvFolds = min(min(classesCount), CVFOLDS)
    if (cvFolds < CVFOLDS):
        print 'Warning in ldaPlot: Cross-validation performed with %d folds (instead of %d)' % (
            cvFolds, CVFOLDS)

    # Data size and color values
    nD = XGood.shape[1]  # number of features in X
    nX = XGood.shape[0]  # number of data points in X
    cClasses = []  # Color code for each class
    for cl in classes:
        icl = (yGood == cl).nonzero()[0][0]
        cClasses.append(np.append(cValGood[icl], 1.0))
    cClasses = np.asarray(cClasses)
    myPrior = np.ones(nClasses) * (1.0 / nClasses)

    # Perform a PCA for dimensionality reduction so that the covariance matrix can be fitted.
    nDmax = int(np.fix(np.sqrt(nX / 5)))
    if nDmax < nD:
        print 'Warning: Insufficient data for', nD, 'parameters. PCA projection to', nDmax, 'dimensions.'
    nDmax = min(nD, nDmax)
    pca = PCA(n_components=nDmax)
    Xr = pca.fit_transform(XGood)
    print 'Variance explained is %.2f%%' % (
        sum(pca.explained_variance_ratio_) * 100.0)

    # Initialise Classifiers
    ldaMod = LDA(n_components=min(nDmax, nClasses - 1),
                 priors=myPrior,
                 shrinkage=None,
                 solver='svd')
    qdaMod = QDA(priors=myPrior)
    rfMod = RF()  # by default assumes equal weights

    # Perform CVFOLDS fold cross-validation to get performance of classifiers.
    ldaScores = np.zeros(cvFolds)
    qdaScores = np.zeros(cvFolds)
    rfScores = np.zeros(cvFolds)
    skf = cross_validation.StratifiedKFold(yGood, cvFolds)
    iskf = 0

    for train, test in skf:

        # Enforce the MINCOUNT in each class for Training
        trainClasses, trainCount = np.unique(yGood[train], return_counts=True)
        goodIndClasses = np.array([n >= MINCOUNTTRAINING for n in trainCount])
        goodIndTrain = np.array(
            [b in trainClasses[goodIndClasses] for b in yGood[train]])

        # Specity the training data set, the number of groups and priors
        yTrain = yGood[train[goodIndTrain]]
        XrTrain = Xr[train[goodIndTrain]]

        trainClasses, trainCount = np.unique(yTrain, return_counts=True)
        ntrainClasses = trainClasses.size

        # Skip this cross-validation fold because of insufficient data
        if ntrainClasses < 2:
            continue
        goodInd = np.array([b in trainClasses for b in yGood[test]])
        if (goodInd.size == 0):
            continue

        # Fit the data
        trainPriors = np.ones(ntrainClasses) * (1.0 / ntrainClasses)
        ldaMod.priors = trainPriors
        qdaMod.priors = trainPriors
        ldaMod.fit(XrTrain, yTrain)
        qdaMod.fit(XrTrain, yTrain)
        rfMod.fit(XrTrain, yTrain)

        ldaScores[iskf] = ldaMod.score(Xr[test[goodInd]], yGood[test[goodInd]])
        qdaScores[iskf] = qdaMod.score(Xr[test[goodInd]], yGood[test[goodInd]])
        rfScores[iskf] = rfMod.score(Xr[test[goodInd]], yGood[test[goodInd]])

        iskf += 1

    if (iskf != cvFolds):
        cvFolds = iskf
        ldaScores.reshape(cvFolds)
        qdaScores.reshape(cvFolds)
        rfScores.reshape(cvFolds)

# Refit with all the data  for the plots

    ldaMod.priors = myPrior
    qdaMod.priors = myPrior
    Xrr = ldaMod.fit_transform(Xr, yGood)
    # Check labels
    for a, b in zip(classes, ldaMod.classes_):
        if a != b:
            print 'Error in ldaPlot: labels do not match'

    # Print the coefficients of first 3 DFA
    print 'LDA Weights:'
    print 'DFA1:', ldaMod.coef_[0, :]
    if nClasses > 2:
        print 'DFA2:', ldaMod.coef_[1, :]
    if nClasses > 3:
        print 'DFA3:', ldaMod.coef_[2, :]

    # Obtain fits in this rotated space for display purposes
    ldaMod.fit(Xrr, yGood)
    qdaMod.fit(Xrr, yGood)
    rfMod.fit(Xrr, yGood)

    XrrMean = Xrr.mean(0)

    # Make a mesh for plotting
    x1, x2 = np.meshgrid(np.arange(-6.0, 6.0, 0.1), np.arange(-6.0, 6.0, 0.1))
    xm1 = np.reshape(x1, -1)
    xm2 = np.reshape(x2, -1)
    nxm = np.size(xm1)
    Xm = np.zeros((nxm, Xrr.shape[1]))
    Xm[:, 0] = xm1
    if Xrr.shape[1] > 1:
        Xm[:, 1] = xm2

    for ix in range(2, Xrr.shape[1]):
        Xm[:, ix] = np.squeeze(np.ones((nxm, 1))) * XrrMean[ix]

    XmcLDA = np.zeros((nxm, 4))  # RGBA values for color for LDA
    XmcQDA = np.zeros((nxm, 4))  # RGBA values for color for QDA
    XmcRF = np.zeros((nxm, 4))  # RGBA values for color for RF

    # Predict values on mesh for plotting based on the first two DFs
    yPredLDA = ldaMod.predict_proba(Xm)
    yPredQDA = qdaMod.predict_proba(Xm)
    yPredRF = rfMod.predict_proba(Xm)

    # Transform the predictions in color codes
    maxLDA = yPredLDA.max()
    for ix in range(nxm):
        cWeight = yPredLDA[ix, :]  # Prob for all classes
        cWinner = (
            (cWeight == cWeight.max()).astype('float'))  # Winner takes all
        # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses
        XmcLDA[ix, :] = np.dot(cWinner, cClasses)
        XmcLDA[ix, 3] = cWeight.max() / maxLDA

    # Plot the surface of probability
    plt.figure(facecolor='white', figsize=(10, 3))
    plt.subplot(131)
    Zplot = XmcLDA.reshape(np.shape(x1)[0], np.shape(x1)[1], 4)
    plt.imshow(Zplot,
               zorder=0,
               extent=[-6, 6, -6, 6],
               origin='lower',
               interpolation='none',
               aspect='auto')
    if nClasses > 2:
        plt.scatter(Xrr[:, 0], Xrr[:, 1], c=cValGood, s=40, zorder=1)
    else:
        plt.scatter(Xrr, (np.random.rand(Xrr.size) - 0.5) * 12.0,
                    c=cValGood,
                    s=40,
                    zorder=1)
    plt.title('%s: LDA pC %.0f %%' % (titleStr, (ldaScores.mean() * 100.0)))
    plt.axis('square')
    plt.xlim((-6, 6))
    plt.ylim((-6, 6))
    plt.xlabel('DFA 1')
    plt.ylabel('DFA 2')

    # Transform the predictions in color codes
    maxQDA = yPredQDA.max()
    for ix in range(nxm):
        cWeight = yPredQDA[ix, :]  # Prob for all classes
        cWinner = (
            (cWeight == cWeight.max()).astype('float'))  # Winner takes all
        # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses
        XmcQDA[ix, :] = np.dot(cWinner, cClasses)
        XmcQDA[ix, 3] = cWeight.max() / maxQDA

    # Plot the surface of probability
    plt.subplot(132)
    Zplot = XmcQDA.reshape(np.shape(x1)[0], np.shape(x1)[1], 4)
    plt.imshow(Zplot,
               zorder=0,
               extent=[-6, 6, -6, 6],
               origin='lower',
               interpolation='none',
               aspect='auto')
    if nClasses > 2:
        plt.scatter(Xrr[:, 0], Xrr[:, 1], c=cValGood, s=40, zorder=1)
    else:
        plt.scatter(Xrr, (np.random.rand(Xrr.size) - 0.5) * 12.0,
                    c=cValGood,
                    s=40,
                    zorder=1)
    plt.title('%s: QDA pC %.0f %%' % (titleStr, (qdaScores.mean() * 100.0)))
    plt.xlabel('DFA 1')
    plt.ylabel('DFA 2')
    plt.axis('square')
    plt.xlim((-6, 6))
    plt.ylim((-6, 6))

    # Transform the predictions in color codes
    maxRF = yPredRF.max()
    for ix in range(nxm):
        cWeight = yPredRF[ix, :]  # Prob for all classes
        cWinner = (
            (cWeight == cWeight.max()).astype('float'))  # Winner takes all
        # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses  # Weighted colors does not work
        XmcRF[ix, :] = np.dot(cWinner, cClasses)
        XmcRF[ix, 3] = cWeight.max() / maxRF

    # Plot the surface of probability
    plt.subplot(133)
    Zplot = XmcRF.reshape(np.shape(x1)[0], np.shape(x1)[1], 4)
    plt.imshow(Zplot,
               zorder=0,
               extent=[-6, 6, -6, 6],
               origin='lower',
               interpolation='none',
               aspect='auto')
    if nClasses > 2:
        plt.scatter(Xrr[:, 0], Xrr[:, 1], c=cValGood, s=40, zorder=1)
    else:
        plt.scatter(Xrr, (np.random.rand(Xrr.size) - 0.5) * 12.0,
                    c=cValGood,
                    s=40,
                    zorder=1)
    plt.title('%s: RF pC %.0f %%' % (titleStr, (rfScores.mean() * 100.0)))
    plt.xlabel('DFA 1')
    plt.ylabel('DFA 2')
    plt.axis('square')
    plt.xlim((-6, 6))
    plt.ylim((-6, 6))

    plt.show()

    # Results
    ldaScore = ldaScores.mean() * 100.0
    qdaScore = qdaScores.mean() * 100.0
    rfScore = rfScores.mean() * 100.0
    ldaScoreSE = ldaScores.std() * 100.0
    qdaScoreSE = qdaScores.std() * 100.0
    rfScoreSE = rfScores.std() * 100.0

    print("Number of classes %d. Chance level %.2f %%") % (nClasses,
                                                           100.0 / nClasses)
    print("%s LDA: %.2f (+/- %0.2f) %%") % (titleStr, ldaScore, ldaScoreSE)
    print("%s QDA: %.2f (+/- %0.2f) %%") % (titleStr, qdaScore, qdaScoreSE)
    print("%s RF: %.2f (+/- %0.2f) %%") % (titleStr, rfScore, rfScoreSE)
    return ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses
예제 #19
0
    def train(self):
        """
        Scale the Dataframe, depends on the model, encode the features, Wavelet
        Transformation, quadratic discriminant analysis, chunk our data into X and
        y samples, Fit the Neural Network.
        """
        df = self.df
        self.scaler = MinMaxScaler()


        if self.model == 'encoder' :

            self.scaler.fit(df)
            df[df.columns]  = self.scaler.transform(df)

            self.autoencoder = autoencoder(self.features)

            X = np.array(self.df.drop(columns = 'up'))
            X = X.reshape(len(X), 1, self.features)

            es = EarlyStopping(monitor = 'val_loss',mode = 'min' , verbose = 1, patience = 20, restore_best_weights = True)
            self.autoencoder.fit(X,X,
                    validation_split = 0.3,
                   callbacks = [es],
                   epochs = 1000,
                   batch_size = 64,
                   shuffle = True)

            X_encode = self.autoencoder.predict(X)
            X_encode.shape = (X_encode.shape[0], X_encode.shape[2])
            new_df = pd.DataFrame(X_encode)
            df.reset_index(inplace = True)
            new_df['up'] = df['up']

            X_train, y_train = get_X_y(new_df, self.n_days, self.length , self.style)

        elif self.model == 'wav' :

            for col_ in df.drop(columns = 'up').columns :
                df[col_] = wavelet_transform(df[col_])[:len(df)]

            self.scaler.fit(df)
            df[df.columns]  = self.scaler.transform(df)

            X_train, y_train = get_X_y(df, self.n_days, self.length , self.style)

        elif self.model = 'qda' :

            self.scaler.fit(df)
            df[df.columns]  = self.scaler.transform(df)

            X_train, y_train = get_X_y(df, self.n_days, self.length , self.style)

            clf = QuadraticDiscriminantAnalysis()

            train = []
            for s in range(len(X_train)) :
                train.append(X_train[s].ravel())
            X_train = train

            clf.fit(X_train, y_train)

            train = []
            for s in range(len(X_train)) :
                quad_dis = clf.predict(X_train[s].reshape(X_train[s].shape[0],1))
                train.append(quad_dis.reshape(self.length, self.features))

            X_train = train
예제 #20
0
                             colsample_bytree=1.0, max_depth=5, gamma=1, min_child_weight=1)
L_model7 = KNeighborsClassifier(2)
R_model1 = SGDClassifier(loss='squared_hinge', penalty='none', alpha=0.001)
R_model2 = DecisionTreeClassifier(max_depth=17, min_samples_split=10)
R_model3 = AdaBoostClassifier(n_estimators=50, learning_rate=1)
R_model4 = GaussianNB()
R_model5 = QuadraticDiscriminantAnalysis()
R_model6 = xgb.XGBClassifier(random_state=1, learning_rate=0.01, subsample=0.6,
                             colsample_bytree=1.0, max_depth=5, gamma=1, min_child_weight=1)
R_model7 = KNeighborsClassifier(2)

L_model1.fit(X_train_left, y_train_left)
L_model2.fit(X_train_left, y_train_left)
L_model3.fit(X_train_left, y_train_left)
L_model4.fit(X_train_left, y_train_left)
L_model5.fit(X_train_left, y_train_left)
L_model6.fit(X_train_left, y_train_left)
L_model7.fit(X_train_left, y_train_left)
R_model1.fit(X_train_right, y_train_right)
R_model2.fit(X_train_right, y_train_right)
R_model3.fit(X_train_right, y_train_right)
R_model4.fit(X_train_right, y_train_right)
R_model5.fit(X_train_right, y_train_right)
R_model6.fit(X_train_right, y_train_right)
R_model7.fit(X_train_right, y_train_right)

joblib.dump(L_model1, 'model/L_model1.pkl')
joblib.dump(L_model2, 'model/L_model2.pkl')
joblib.dump(L_model3, 'model/L_model3.pkl')
joblib.dump(L_model4, 'model/L_model4.pkl')
joblib.dump(L_model5, 'model/L_model5.pkl')
예제 #21
0
    rfc.score(X_test, y_test)))

# In[37]:

ABC = AdaBoostClassifier(n_estimators=100)
ABC.fit(X_train, y_train)
print('Accuracy of ABC classifier on training set: {:.2f}'.format(
    ABC.score(X_train, y_train)))
print('Accuracy of ABC classifier on test set: {:.2f}'.format(
    ABC.score(X_test, y_test)))

# In[38]:

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
Qda = QuadraticDiscriminantAnalysis()
Qda.fit(X_train, y_train)
print('Accuracy of QDA classifier on training set: {:.2f}'.format(
    Qda.score(X_train, y_train)))
print('Accuracy of QDA classifier on test set: {:.2f}'.format(
    Qda.score(X_test, y_test)))

# In[39]:

from sklearn.gaussian_process import GaussianProcessClassifier
GPC = GaussianProcessClassifier()
GPC.fit(X_train, y_train)
print('Accuracy of GPC classifier on training set: {:.2f}'.format(
    GPC.score(X_train, y_train)))
print('Accuracy of GPC classifier on test set: {:.2f}'.format(
    GPC.score(X_test, y_test)))
예제 #22
0
    splot.set_xticks(())
    splot.set_yticks(())


def plot_lda_cov(lda, splot):
    plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red')
    plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue')


def plot_qda_cov(qda, splot):
    plot_ellipse(splot, qda.means_[0], qda.covariances_[0], 'red')
    plot_ellipse(splot, qda.means_[1], qda.covariances_[1], 'blue')

###############################################################################
for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]):
    # Linear Discriminant Analysis
    lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
    y_pred = lda.fit(X, y).predict(X)
    splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1)
    plot_lda_cov(lda, splot)
    plt.axis('tight')

    # Quadratic Discriminant Analysis
    qda = QuadraticDiscriminantAnalysis()
    y_pred = qda.fit(X, y, store_covariances=True).predict(X)
    splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2)
    plot_qda_cov(qda, splot)
    plt.axis('tight')
plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant Analysis')
plt.show()
Xt = []
Yt = []
acc = []
for i in train:
    X.append(i[:len(i) - 1])
    Y.append(i[len(i) - 1:][0])
for i in test:
    Xt.append(i[:len(i) - 1])
    Yt.append(i[len(i) - 1:][0])
print(len(Y))
print(len(Yt))
#Create a svm Classifier
clf = QuadraticDiscriminantAnalysis()  # Linear Kernel

#Train the model using the training sets
clf.fit(X, Y)

#Predict the response for test dataset
y_pred = clf.predict(Xt)
count = 0
j = 0
for i in y_pred:
    if int(i) == int(Yt[j]):
        count = count + 1
    j += 1
print(count)
print((count / len(Yt)) * 100)
acc.append((count / len(Yt)) * 100)
print("=== Confusion Matrix ===")
print(confusion_matrix(Yt, y_pred))
print('\n')
예제 #24
0
def QuadDA(X_train, y_train, X_test, y_test):
    clf = QDA()
    clf.fit(X_train, y_train)
    accuracy = clf.score(X_test, y_test)
    return accuracy
예제 #25
0
lda.fit(features_train2, labels_train2)
predict2 = lda.predict(features_test2)

# Training accuracy
print("The training accuracy is: ")
print(accuracy_score(labels_train2, lda.predict(features_train2)))

# Test accuracy
print("The test accuracy is: ")
print(accuracy_score(labels_test2, predict2))

#%% QDA for comparison Part 1
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA

qda = QDA()
qda.fit(features_train1, labels_train1)
predict1 = qda.predict(features_test1)

# Training accuracy
print("The training accuracy is: ")
print(accuracy_score(labels_train1, qda.predict(features_train1)))

# Test accuracy
print("The test accuracy is: ")
print(accuracy_score(labels_test1, predict1))

#%% QDA for comparison Part 2

qda = QDA()
qda.fit(features_train2, labels_train2)
predict2 = qda.predict(features_test2)
예제 #26
0
def discriminatePlot(X, y, cVal, titleStr=''):
    # Frederic's Robust Wrapper for discriminant analysis function.  Performs lda, qda and RF afer error checking, 
    # Generates nice plots and returns cross-validated
    # performance, stderr and base line.
    # X np array n rows x p parameters
    # y group labels n rows
    # rgb color code for each data point - should be the same for each data beloging to the same group
    # titleStr title for plots
    # returns: ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses
    
    # Global Parameters
    CVFOLDS = 10
    MINCOUNT = 10
    MINCOUNTTRAINING = 5 
    
    # Initialize Variables and clean up data
    classes, classesCount = np.unique(y, return_counts = True)  # Classes to be discriminated should be same as ldaMod.classes_
    goodIndClasses = np.array([n >= MINCOUNT for n in classesCount])
    goodInd = np.array([b in classes[goodIndClasses] for b in y])
    yGood = y[goodInd]
    XGood = X[goodInd]
    cValGood = cVal[goodInd]


    classes, classesCount = np.unique(yGood, return_counts = True) 
    nClasses = classes.size         # Number of classes or groups  

    # Do we have enough data?  
    if (nClasses < 2):
        print 'Error in ldaPLot: Insufficient classes with minimun data (%d) for discrimination analysis' % (MINCOUNT)
        return -1, -1, -1, -1 , -1, -1, -1
    cvFolds = min(min(classesCount), CVFOLDS)
    if (cvFolds < CVFOLDS):
        print 'Warning in ldaPlot: Cross-validation performed with %d folds (instead of %d)' % (cvFolds, CVFOLDS)
   
    # Data size and color values   
    nD = XGood.shape[1]                 # number of features in X
    nX = XGood.shape[0]                 # number of data points in X
    cClasses = []   # Color code for each class
    for cl in classes:
        icl = (yGood == cl).nonzero()[0][0]
        cClasses.append(np.append(cValGood[icl],1.0))
    cClasses = np.asarray(cClasses)
    myPrior = np.ones(nClasses)*(1.0/nClasses)  

    # Perform a PCA for dimensionality reduction so that the covariance matrix can be fitted.
    nDmax = int(np.fix(np.sqrt(nX/5)))
    if nDmax < nD:
        print 'Warning: Insufficient data for', nD, 'parameters. PCA projection to', nDmax, 'dimensions.' 
    nDmax = min(nD, nDmax)
    pca = PCA(n_components=nDmax)
    Xr = pca.fit_transform(XGood)
    print 'Variance explained is %.2f%%' % (sum(pca.explained_variance_ratio_)*100.0)
    
    
    # Initialise Classifiers  
    ldaMod = LDA(n_components = min(nDmax,nClasses-1), priors = myPrior, shrinkage = None, solver = 'svd') 
    qdaMod = QDA(priors = myPrior)
    rfMod = RF()   # by default assumes equal weights

        
    # Perform CVFOLDS fold cross-validation to get performance of classifiers.
    ldaScores = np.zeros(cvFolds)
    qdaScores = np.zeros(cvFolds)
    rfScores = np.zeros(cvFolds)
    skf = cross_validation.StratifiedKFold(yGood, cvFolds)
    iskf = 0
    
    for train, test in skf:
        
        # Enforce the MINCOUNT in each class for Training
        trainClasses, trainCount = np.unique(yGood[train], return_counts=True)
        goodIndClasses = np.array([n >= MINCOUNTTRAINING for n in trainCount])
        goodIndTrain = np.array([b in trainClasses[goodIndClasses] for b in yGood[train]])

        # Specity the training data set, the number of groups and priors
        yTrain = yGood[train[goodIndTrain]]
        XrTrain = Xr[train[goodIndTrain]]

        trainClasses, trainCount = np.unique(yTrain, return_counts=True) 
        ntrainClasses = trainClasses.size
        
        # Skip this cross-validation fold because of insufficient data
        if ntrainClasses < 2:
            continue
        goodInd = np.array([b in trainClasses for b in yGood[test]])    
        if (goodInd.size == 0):
            continue
           
        # Fit the data
        trainPriors = np.ones(ntrainClasses)*(1.0/ntrainClasses)
        ldaMod.priors = trainPriors
        qdaMod.priors = trainPriors
        ldaMod.fit(XrTrain, yTrain)
        qdaMod.fit(XrTrain, yTrain)        
        rfMod.fit(XrTrain, yTrain)
        

        ldaScores[iskf] = ldaMod.score(Xr[test[goodInd]], yGood[test[goodInd]])
        qdaScores[iskf] = qdaMod.score(Xr[test[goodInd]], yGood[test[goodInd]])
        rfScores[iskf] = rfMod.score(Xr[test[goodInd]], yGood[test[goodInd]])

        iskf += 1
     
    if (iskf !=  cvFolds):
        cvFolds = iskf
        ldaScores.reshape(cvFolds)
        qdaScores.reshape(cvFolds)
        rfScores.reshape(cvFolds)
      
# Refit with all the data  for the plots
        
    ldaMod.priors = myPrior
    qdaMod.priors = myPrior
    Xrr = ldaMod.fit_transform(Xr, yGood)
    # Check labels
    for a, b in zip(classes, ldaMod.classes_):
        if a != b:
            print 'Error in ldaPlot: labels do not match'
  
    # Print the coefficients of first 3 DFA 
    print 'LDA Weights:'
    print 'DFA1:', ldaMod.coef_[0,:]
    if nClasses > 2:
        print 'DFA2:', ldaMod.coef_[1,:] 
    if nClasses > 3:
        print 'DFA3:', ldaMod.coef_[2,:] 
        
    # Obtain fits in this rotated space for display purposes   
    ldaMod.fit(Xrr, yGood)    
    qdaMod.fit(Xrr, yGood)
    rfMod.fit(Xrr, yGood)
    
    XrrMean = Xrr.mean(0)
                
    # Make a mesh for plotting
    x1, x2 = np.meshgrid(np.arange(-6.0, 6.0, 0.1), np.arange(-6.0, 6.0, 0.1))
    xm1 = np.reshape(x1, -1)
    xm2 = np.reshape(x2, -1)
    nxm = np.size(xm1)
    Xm = np.zeros((nxm, Xrr.shape[1]))
    Xm[:,0] = xm1
    if Xrr.shape[1] > 1 :
        Xm[:,1] = xm2
        
    for ix in range(2,Xrr.shape[1]):
        Xm[:,ix] = np.squeeze(np.ones((nxm,1)))*XrrMean[ix]
        
    XmcLDA = np.zeros((nxm, 4))  # RGBA values for color for LDA
    XmcQDA = np.zeros((nxm, 4))  # RGBA values for color for QDA
    XmcRF = np.zeros((nxm, 4))  # RGBA values for color for RF

    
    # Predict values on mesh for plotting based on the first two DFs     
    yPredLDA = ldaMod.predict_proba(Xm) 
    yPredQDA = qdaMod.predict_proba(Xm) 
    yPredRF = rfMod.predict_proba(Xm)

    
    # Transform the predictions in color codes
    maxLDA = yPredLDA.max()
    for ix in range(nxm) :
        cWeight = yPredLDA[ix,:]                               # Prob for all classes
        cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all 
        # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses
        XmcLDA[ix,:] = np.dot(cWinner, cClasses)
        XmcLDA[ix,3] = cWeight.max()/maxLDA
    
    # Plot the surface of probability    
    plt.figure(facecolor='white', figsize=(10,3))
    plt.subplot(131)
    Zplot = XmcLDA.reshape(np.shape(x1)[0], np.shape(x1)[1],4)
    plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto')
    if nClasses > 2:
        plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1)
    else:
        plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) 
    plt.title('%s: LDA pC %.0f %%' % (titleStr, (ldaScores.mean()*100.0)))
    plt.axis('square')
    plt.xlim((-6, 6))
    plt.ylim((-6, 6))    
    plt.xlabel('DFA 1')
    plt.ylabel('DFA 2')

    
    # Transform the predictions in color codes
    maxQDA = yPredQDA.max()
    for ix in range(nxm) :
        cWeight = yPredQDA[ix,:]                               # Prob for all classes
        cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all 
        # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses
        XmcQDA[ix,:] = np.dot(cWinner, cClasses)
        XmcQDA[ix,3] = cWeight.max()/maxQDA
    
    # Plot the surface of probability    
    plt.subplot(132)
    Zplot = XmcQDA.reshape(np.shape(x1)[0], np.shape(x1)[1],4)
    plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto')
    if nClasses > 2:
        plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1)
    else:
        plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) 
    plt.title('%s: QDA pC %.0f %%' % (titleStr, (qdaScores.mean()*100.0)))
    plt.xlabel('DFA 1')
    plt.ylabel('DFA 2')
    plt.axis('square')
    plt.xlim((-6, 6))
    plt.ylim((-6, 6))
    
    
    # Transform the predictions in color codes
    maxRF = yPredRF.max()
    for ix in range(nxm) :
        cWeight = yPredRF[ix,:]           # Prob for all classes
        cWinner = ((cWeight == cWeight.max()).astype('float')) # Winner takes all 
        # XmcLDA[ix,:] = np.dot(cWeight, cClasses)/nClasses  # Weighted colors does not work
        XmcRF[ix,:] = np.dot(cWinner, cClasses)
        XmcRF[ix,3] = cWeight.max()/maxRF
    
    # Plot the surface of probability    
    plt.subplot(133)
    Zplot = XmcRF.reshape(np.shape(x1)[0], np.shape(x1)[1],4)
    plt.imshow(Zplot, zorder=0, extent=[-6, 6, -6, 6], origin='lower', interpolation='none', aspect='auto')
    if nClasses > 2:    
        plt.scatter(Xrr[:,0], Xrr[:,1], c=cValGood, s=40, zorder=1)
    else:
        plt.scatter(Xrr,(np.random.rand(Xrr.size)-0.5)*12.0 , c=cValGood, s=40, zorder=1) 
    plt.title('%s: RF pC %.0f %%' % (titleStr, (rfScores.mean()*100.0)))
    plt.xlabel('DFA 1')
    plt.ylabel('DFA 2')
    plt.axis('square')
    plt.xlim((-6, 6))
    plt.ylim((-6, 6))
    
    plt.show()


    # Results
    ldaScore = ldaScores.mean()*100.0
    qdaScore = qdaScores.mean()*100.0
    rfScore = rfScores.mean()*100.0
    ldaScoreSE = ldaScores.std() * 100.0
    qdaScoreSE = qdaScores.std() * 100.0 
    rfScoreSE = rfScores.std() * 100.0 
    
    print ("Number of classes %d. Chance level %.2f %%") % (nClasses, 100.0/nClasses)
    print ("%s LDA: %.2f (+/- %0.2f) %%") % (titleStr, ldaScore, ldaScoreSE)
    print ("%s QDA: %.2f (+/- %0.2f) %%") % (titleStr, qdaScore, qdaScoreSE)
    print ("%s RF: %.2f (+/- %0.2f) %%") % (titleStr, rfScore, rfScoreSE)
    return ldaScore, ldaScoreSE, qdaScore, qdaScoreSE, rfScore, rfScoreSE, nClasses
            accuracy = printMetrics(actual, predicted)
            scores.append(accuracy)
            print(scores)
            
        
exit 

'''
kfcv = kFoldCV()
kfcv.kfold_evaluate(df)
'''

#use built in QDA from sklearn
from sklearn.datasets import load_breast_cancer 
data = load_breast_cancer()
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

df_sklearn = pd.DataFrame(data.data, columns=data.feature_names)
df_sklearn['target']=pd.Series(data.target)
x_var = df_sklearn.drop(('target'),axis=1)
y_var = df_sklearn['target']

qda = QuadraticDiscriminantAnalysis()
qda_fit = qda.fit(x_var,y_var)

prediction = qda_fit.predict(x_var)
print(prediction)

print(1-qda.score(x_var,y_var))

예제 #28
0
dismodel.fit(X_train, Y_train)
Y_train_pred = dismodel.predict(X_train)
cmtr = confusion_matrix(Y_train, Y_train_pred)
print("Confusion Matrix Training:\n", cmtr)
acctr = accuracy_score(Y_train, Y_train_pred)
print("Accurray Training:", acctr)
Y_test_pred = dismodel.predict(X_test)
cmte = confusion_matrix(Y_test, Y_test_pred)
print("Confusion Matrix Testing:\n", cmte)
accte = accuracy_score(Y_test, Y_test_pred)
print("Accurray Test:", accte)
report.loc[len(report)] = ['Linear Discriminant Analysis', acctr, accte]

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
qdismodel = QuadraticDiscriminantAnalysis()
qdismodel.fit(X_train, Y_train)
Y_train_pred = qdismodel.predict(X_train)
cmtr = confusion_matrix(Y_train, Y_train_pred)
print("Confusion Matrix Training:\n", cmtr)
acctr = accuracy_score(Y_train, Y_train_pred)
print("Accurray Training:", acctr)
Y_test_pred = qdismodel.predict(X_test)
cmte = confusion_matrix(Y_test, Y_test_pred)
print("Confusion Matrix Testing:\n", cmte)
accte = accuracy_score(Y_test, Y_test_pred)
print("Accurray Test:", accte)
report.loc[len(report)] = ['Quadratic Discriminant Analysis', acctr, accte]


##################
# Neural Network #
예제 #29
0
    # get training, validation and test datasets for specified roi
    training_data, validation_data, test_data = ds.split_data()

    ###########################################################################
    #
    #        CREATE MODEL
    #
    ###########################################################################

    # Define the estimator: quadratic discriminant analysis
    from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

    qda = QuadraticDiscriminantAnalysis()

    qda.fit(training_data[0], training_data[1])

    from sklearn.metrics import accuracy_score

    # record the best result
    accuracies[i] = accuracy_score(test_data[1], qda.predict(test_data[0]))


mean_accuracy = accuracies.mean()
print("\n\nmean accuracy: %f" % mean_accuracy)

###############################################################################
#
#   VISUALIZE
#
###############################################################################
예제 #30
0
def traditional_models (X_train, y_train, X_test, y_test, pos_label=None):
    """
    Applies logistic regression
    :param X_train: Training Set Predictors
    :param X_test: Test Set Predictors
    :param y_train: Test Set response
    :param y_test: Test Set response
    :return: Dataframe with ML technique
    """
    # Logistic regression
    cvals = [1e-20, 1e-15, 1e-10, 1e-5, 1e-3, 1e-1, 1, 10, 100, 10000, 100000]
    logregcv = LogisticRegressionCV(Cs=cvals, cv=5)
    logregcv.fit(X_train, y_train)
    yhat = logregcv.predict(X_test)
    logreg_acc = accuracy_score(y_test, yhat)
    logreg_dacc = directional_accuracy(y_test, yhat)
    fpr_log, tpr_log, thresholds = metrics.roc_curve(
          y_test, logregcv.predict_proba(X_test)[:, 1], pos_label=pos_label)
    logreg_auc = auc(fpr_log, tpr_log)

    # knn
    ks = [2**x for x in range(2, 8)]

    cv_scores = []
    for k in ks:
        knn = KNeighborsClassifier(n_neighbors=k)

        scores = cross_val_score(knn, X_train, y_train,
                                 cv=5, scoring="accuracy")
        cv_scores.append(scores.mean())

    opt_k = ks[np.argmax(cv_scores)]
    # print('The optimal value for k is %d, with a score of %.3f.'
    #     % (opt_k, cv_scores[np.argmax(cv_scores)]))

    knn = KNeighborsClassifier(n_neighbors=opt_k)
    scores = cross_val_score(knn, X_train, y_train, cv=5)

    knn.fit(X_train, y_train)
    yhat = knn.predict(X_test)
    knn_acc = accuracy_score(y_test, yhat)
    knn_dacc = directional_accuracy(y_test, yhat)
    # Calculating auc on testset
    fpr_knn, tpr_knn, thresholds = metrics.roc_curve(
          y_test, knn.predict_proba(X_test)[:, 1], pos_label=pos_label)
    knn_auc = auc(fpr_knn, tpr_knn)

    # LDA
    lda = LinearDiscriminantAnalysis()
    scores = cross_val_score(lda, X_train, y_train, cv=5)

    lda.fit(X_train, y_train)
    yhat = lda.predict(X_test)
    lda_acc = accuracy_score(y_test, yhat)
    lda_dacc = directional_accuracy(y_test, yhat)
    # Calculating auc on testset
    fpr_lda, tpr_lda, thresholds = metrics.roc_curve(
          y_test, lda.predict_proba(X_test)[:, 1], pos_label=pos_label)
    lda_auc = auc(fpr_lda, tpr_lda)

    # QDA
    qda = QuadraticDiscriminantAnalysis()
    scores = cross_val_score(qda, X_train, y_train, cv=5)

    qda.fit(X_train, y_train)
    yhat = qda.predict(X_test)
    qda_acc = accuracy_score(y_test, yhat)
    qda_dacc = directional_accuracy(y_test, yhat)
    # Calculating auc on testset
    fpr_qda, tpr_qda, thresholds = metrics.roc_curve(
          y_test, qda.predict_proba(X_test)[:, 1], pos_label=pos_label)
    qda_auc = auc(fpr_qda, tpr_qda)

    # Random Forest
    tree_cnts = [2**i for i in range(1, 9)]

    # List to hold the results.
    cv_scores = []

    for tree_cnt in tree_cnts:
        # Train the RF model, note that sqrt(p) is the default
        # number of predictors, so it isn't specified here.
        rf = RandomForestClassifier(n_estimators=tree_cnt)
        scores = cross_val_score(rf, X_train, y_train, cv=5)

        cv_scores.append([tree_cnt, scores.mean()])

    cv_scores = np.array(cv_scores)

    opt_tree_cnt = int(cv_scores[np.argmax(np.array(cv_scores)[:, 1])][0])

    rf = RandomForestClassifier(n_estimators=opt_tree_cnt)
    scores = cross_val_score(rf, X_train, y_train, cv=5)

    rf.fit(X_train, y_train)
    yhat = rf.predict(X_test)
    rf_acc = accuracy_score(y_test, yhat)
    rf_dacc = directional_accuracy(y_test, yhat)
    # Calculating auc on testset
    fpr_rf, tpr_rf, thresholds = metrics.roc_curve(
          y_test, rf.predict_proba(X_test)[:, 1], pos_label=pos_label)
    rf_auc = auc(fpr_rf, tpr_rf)

    # ADA Boost
    td = [1, 2]
    trees = [2**x for x in range(1, 8)]
    param_grid = {"n_estimators":trees,
                  "max_depth":td,
                  "learning_rate":[0.05]
                  }

    p = np.zeros((len(trees)*len(td), 3))
    k = 0
    for i in range(0, len(trees)):
        for j in range(0, len(td)):
            ada = AdaBoostClassifier(
                  base_estimator=DecisionTreeClassifier(max_depth=td[j]),
                  n_estimators=trees[i],
                  learning_rate=.05)
            p[k, 0] = trees[i]
            p[k, 1] = td[j]
            p[k, 2] = np.mean(cross_val_score(ada, X_train, y_train, cv=5))
            k = k + 1
    x = pd.DataFrame(p)
    x.columns = ['ntree', 'depth', 'cv_score']
    p = x.ix[x['cv_score'].argmax()]
    ada = AdaBoostClassifier(
          base_estimator=DecisionTreeClassifier(max_depth=p[1]),
          n_estimators=int(p[0]), learning_rate=.05)
    ada.fit(X_train, y_train)
    yhat = ada.predict(X_test)
    ada_acc = accuracy_score(y_test, yhat)
    ada_dacc = directional_accuracy(y_test, yhat)

    # Calculating auc on testset
    fpr_ada, tpr_ada, thresholds = metrics.roc_curve(
          y_test, ada.predict_proba(X_test)[:, 1], pos_label=pos_label)
    ada_auc = auc(fpr_ada, tpr_ada)

    # Support Vector Classification
    svc = svm.SVC(kernel='rbf', random_state=0, gamma=1, C=1, probability=True)
    # scores = cross_val_score(svc, X_train, y_train, cv=5)
    svc.fit(X_train, y_train)
    yhat = svc.predict_proba(X_test)[:, 1]
    svm_acc = accuracy_score(y_test, yhat>0.5)
    svm_dacc = directional_accuracy(y_test, yhat)

    # Calculating auc on testset
    fpr_svm, tpr_svm, thresholds = metrics.roc_curve(
          y_test, svc.predict_proba(X_test)[:, 1], pos_label=pos_label)
    svm_auc = auc(fpr_svm, tpr_svm)

    x = pd.DataFrame({'Accuracy':[logreg_acc, knn_acc, lda_acc, qda_acc, rf_acc,
                                  ada_acc, svm_acc],
                      'AUC':[logreg_auc, knn_auc, lda_auc, qda_auc, rf_auc,
                             ada_auc, svm_auc],
                      'D_Accuracy':[logreg_dacc, knn_dacc, lda_dacc, qda_dacc,
                                    rf_dacc,
                                    ada_dacc, svm_dacc]},
                     index=['LogReg', 'KNN', 'LDA', 'QDA', 'RandomForest',
                            'ADABoost', 'SVM'])
    return x
예제 #31
0
	# First, split data in training and validation sets
	X_train, X_test, y_train, y_test = train_test_split(X, class_label, random_state=random_state)

	# Initialize Classificators
	clf_1 = neighbors.KNeighborsClassifier(n_neighbors, weights='uniform')
	clf_2 = AdaBoostClassifier(n_estimators=100)
	clf_3 = linear_model.LinearRegression()
	clf_4 = LinearDiscriminantAnalysis()
	clf_5 = QuadraticDiscriminantAnalysis()

	# Test classificator with training and validation data
	y_pred_1 = clf_1.fit(X_train, y_train).predict(X_test)
	y_pred_2 = clf_2.fit(X_train, y_train).predict(X_test)
	y_pred_3 = clf_3.fit(X_train, y_train).predict(X_test)
	y_pred_4 = clf_4.fit(X_train, y_train).predict(X_test)
	y_pred_5 = clf_5.fit(X_train, y_train).predict(X_test)

	'''
	Z_1 = clf_1.decision_function(np.c_[xx.ravel(), yy.ravel()])
	Z_2 = clf_2.decision_function(np.c_[xx.ravel(), yy.ravel()])
	Z_3 = clf_3.decision_function(np.c_[xx.ravel(), yy.ravel()])
	Z_3 = Z_3.reshape(xx.shape)
	Z_4 = clf_4.decision_function(np.c_[xx.ravel(), yy.ravel()])
	Z_5 = clf_5.decision_function(np.c_[xx.ravel(), yy.ravel()])
	'''

	# Obtain confusion matrices
	cm_1 += confusion_matrix(y_test, y_pred_1)
	cm_2 += confusion_matrix(y_test, y_pred_2)
	# cm_3 += confusion_matrix(y_test, y_pred_3)
	cm_4 += confusion_matrix(y_test, y_pred_4)
예제 #32
0
# Gender
Y = [
    'male', 'male', 'female', 'female', 'male', 'male', 'female', 'female',
    'female', 'male', 'male'
]

# Stores the decision tree model from sklearn
clf = tree.DecisionTreeClassifier()
clf2 = KNeighborsClassifier(n_neighbors=2)
clf3 = QuadraticDiscriminantAnalysis()

# The result is stored in the updated clf variable
# fit method trains the decision tree on the dataset
clf = clf.fit(X, Y)
clf2 = clf2.fit(X, Y)
clf3 = clf3.fit(X, Y)

# DecisionTreeClassifier Predictions
# predict method gives a result based on the pretrained data-set.
pred = clf.predict([[175, 60, 38]])
# Prints female
print "DecisionTreeClassifier", pred
pred2 = clf.predict([[165, 70, 38]])
# Prints female
print "DecisionTreeClassifier", pred2

# KNeighborsClassifier Predictions
pred3 = clf2.predict([[175, 60, 38]])
# prints female
print "KNeighborsClassifier", pred3
pred4 = clf2.predict([[165, 70, 38]])
예제 #33
0
class road_estimation:
    def __init__(self, model_selection):
        self._train_data, self._train_targets, self._valid_data, self._valid_targets, self._test_data, self._test_targets = (
            data_load()
        )

        self._model_selection = model_selection
        self._classifier = []

    def train(self):
        if self._model_selection == "svm":
            # selected the svc in svm
            self._classifier = svm.SVC()
        elif self._model_selection == "nb":
            self._classifier = GaussianNB()
        elif self._model_selection == "knn":
            # parameter n_jobs can be set to -1 to enable parallel calculating
            self._classifier = KNeighborsClassifier(n_neighbors=7)
        elif self._model_selection == "ada":
            # Bunch of parameters, n_estimators, learning_rate
            self._classifier = AdaBoostClassifier()
        elif self._model_selection == "rf":
            # many parameters including n_jobs
            self._classifier = RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1)
        elif self._model_selection == "qda":
            # complicated array like parameters, perhaps leave it default
            self._classifier = QuadraticDiscriminantAnalysis()
        else:
            print "Please refer to one classifier"

        self._classifier.fit(self._train_data, self._train_targets)
        # predict on valid data
        prediction_valid = self._classifier.predict(self._valid_data)
        # print validation result for selected model.
        print (
            "Classification report for classifier %s on valid_data:\n%s\n"
            % (self._model_selection, metrics.classification_report(self._valid_targets, prediction_valid))
        )

    def test(self):
        # predict on test data
        prediction_test = self._classifier.predict(self.test_data)
        # print test result for selected model.
        print (
            "Classification report for classifier %s on test_data:\n%s\n"
            % (self._model_selection, metrics.classification_report(self._test_targets, prediction_test))
        )

    def showPredictionImage(self):
        f = Feature()
        f.loadImage("um_000000.png")
        f.extractFeatures()
        fea_matrix = f.getFeaturesVectors()

        predict = self._classifier.predict(fea_matrix)
        image = np.copy(f.image)

        num_superpixels = np.max(f.superpixel) + 1
        for i in xrange(0, num_superpixels):
            indices = np.where(f.superpixel == i)
            if predict[i] == 1:

                image[indices[0], indices[1], 0] = 1
                image[indices[0], indices[1], 1] = 1
                image[indices[0], indices[1], 2] = 0
        plt.imshow(image)
        plt.show()
        # show prediction image with superpixels
        plt.imshow(mark_boundaries(image, superpixels))
        plt.show()
예제 #34
0
                 index_col=0,
                 parse_dates=True)
print(df.head())
print("=" * 25)
#--------------------
X_train = df[:'2004'][['Lag1', 'Lag2']]
y_train = df[:'2004']['Direction']

X_test = df['2005':][['Lag1', 'Lag2']]
y_test = df['2005':]['Direction']
#----------------------------------------------------
lda = LinearDiscriminantAnalysis()
model = lda.fit(X_train, y_train)

pred = model.predict(X_test)
print('Prediction for LDA : ', np.unique(pred, return_counts=True))
print('CM for LDA : \n', confusion_matrix(pred, y_test))
print('Report for LDA \n: ', classification_report(y_test, pred, digits=3))
print("=" * 25)
#----------------------------------------------------
qda = QuadraticDiscriminantAnalysis()
model2 = qda.fit(X_train, y_train)

pred2 = model2.predict(X_test)
print('Prediction for QDA : ', np.unique(pred2, return_counts=True))
print('CM for QDA : \n', confusion_matrix(pred2, y_test))
print('Report for QDA : \n', classification_report(y_test, pred2, digits=3))
print("=" * 25)

#----------------------------------------------------
comps = pca.fit_transform(data) 
plt.plot(pca.components_.reshape((2,data.shape[0],data.shape[1])))

#plt.plot(pca.explained_variance_, linewidth=2)
#plt.title('Principal Component Analysis (PCA) Feature Assessment')

# Creation of labels
labels = []
for i in range(0,27):
    labels.append(1)
for i in range(27,53):
    labels.append(2)

# LDA model
lda = QuadraticDiscriminantAnalysis()
lda.fit(comps, labels)
y_pred = lda.predict(comps)
print(labels)
print(y_pred)
mcc = matthews_corrcoef(labels,y_pred)
print("MCC="+str(mcc))


# Plotting LDA contour
nx, ny = 200, 100
x_min, x_max = np.amin(comps[:,0]), np.amax(comps[:,0])
y_min, y_max = np.amin(comps[:,1]), np.amax(comps[:,1])
xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),np.linspace(y_min, y_max, ny))
Z = lda.predict_proba(np.c_[xx.ravel(), yy.ravel()])
Z = Z[:, 1].reshape(xx.shape)
plt.contour(xx, yy, Z, [0.5], linewidths=5, colors = 'k', linestyles = 'dashed')
예제 #36
0
class QuadraticIQDiscriminator(IQDiscriminationFitter):
    """Quadratic discriminant analysis discriminator for IQ data."""
    def __init__(self,
                 cal_results: Union[Result, List[Result]],
                 qubit_mask: List[int],
                 expected_states: List[str] = None,
                 standardize: bool = False,
                 schedules: Union[List[str], List[Schedule]] = None,
                 discriminator_parameters: dict = None):
        """
        Args:
            cal_results (Union[Result, List[Result]]): calibration results,
                Result or list of Result used to fit the discriminator.
            qubit_mask (List[int]): determines which qubit's level 1 data to
                use in the discrimination process.
            expected_states (List[str]): a list that should have the same
                length as schedules. All results in cal_results are used if
                schedules is None. expected_states must have the corresponding
                length.
            standardize (bool): if true the discriminator will standardize the
                xdata using the internal method _scale_data.
            schedules (Union[List[str], List[Schedule]]): The schedules or a
                subset of schedules in cal_results used to train the
                discriminator. The user may also pass the name of the schedules
                instead of the schedules. If schedules is None, then all the
                schedules in cal_results are used.
            discriminator_parameters (dict): parameters for Sklearn's LDA.
        Raises:
            ImportError: If scikit-learn is not installed
        """
        if not discriminator_parameters:
            discriminator_parameters = {}

        store_cov = discriminator_parameters.get('store_covariance', False)
        tol = discriminator_parameters.get('tol', 1.0e-4)
        if not HAS_SKLEARN:
            raise ImportError("To use the QuadraticIQDiscriminator class "
                              "scikit-learn needs to be installed. This can "
                              "be done with 'pip install scikit-learn'")

        self._qda = QuadraticDiscriminantAnalysis(store_covariance=store_cov,
                                                  tol=tol)

        # Also sets the x and y data.
        IQDiscriminationFitter.__init__(self, cal_results, qubit_mask,
                                        expected_states, standardize,
                                        schedules)

        self._description = 'Quadratic IQ discriminator for measurement ' \
                            'level 1.'

        self.fit()

    def fit(self):
        """Fits the discriminator using self._xdata and self._ydata."""
        if len(self._xdata) == 0:
            return

        self._qda.fit(self._xdata, self._ydata)
        self._fitted = True

    def discriminate(self, x_data: List[List[float]]) -> List[str]:
        """Applies the discriminator to x_data.

        Args:
            x_data (List[List[float]]): list of features. Each feature is
                                        itself a list.

        Returns:
            The discriminated x_data as a list of labels.
        """
        return self._qda.predict(x_data)
예제 #37
0
lin_clf = svm.LinearSVC()
lin_clf.fit(image_stack, y_training.ravel())
##print lin_clf.predict(image_stack_test)

clf_neu = MLPClassifier(solver='lbfgs', alpha=1e-5)

clf_neu.fit(image_stack, y_training.ravel())

##print clf_neu.predict(image_stack_test)
clf_rand = RandomForestClassifier()
clf_rand.fit(image_stack, y_training.ravel())

clf_lind = LinearDiscriminantAnalysis()
clf_lind.fit(image_stack, y_training.ravel())
clf_quad = QuadraticDiscriminantAnalysis()
clf_quad.fit(image_stack, y_training.ravel())
clf_dt = DecisionTreeClassifier()
clf_adaboost = AdaBoostClassifier(n_estimators=100)
clf_nb = GaussianNB()
reg_lasso = linear_model.Lasso(alpha=0.1)
##scaler = StandardScaler()
##scaler.fit(image_stack)
##image_stack = scaler.transform(image_stack)
##image_stack_test = scaler.transform(image_stack_test)

print(cross_val_score(neigh, image_stack, y_training.ravel(), cv=5))

print sum(cross_val_score(neigh, image_stack, y_training.ravel(), cv=5)) / 5

print(cross_val_score(clf, image_stack, y_training.ravel(), cv=5))
test = pd.read_csv("test.csv")

#Select features and parse the result to pandas dataframe
X_test = pd.DataFrame(test.loc[:, features].values)

#Load targets for test
submission = pd.read_csv("gender_submission.csv")

#Select the target column
Y_test = submission.loc[:, "Survived"].values

#Slpit train data into features and targets for train
X_train = pd.DataFrame(train.loc[:, features].values)
Y_train = train.loc[:, "Survived"].values

#Data encoding : since machine learning work just with number we're going to parse strings to numeric values using Label Encoder
le = preprocessing.LabelEncoder()
X_train = X_train.apply(le.fit_transform)
X_test = X_test.apply(le.fit_transform)

#Create a Quadratic Discriminant Analysis instance
classifier = QuadraticDiscriminantAnalysis()

#Fit the classifier
classifier.fit(X_train, Y_train)

#Calculate the score (Accuracy)
score = classifier.score(X_test, Y_test)

#Printing the score
print(score)
예제 #39
0
lda_pred_posterior = lda_clf.predict_proba(test_df[predictors])
print(lda_pred_posterior)

print(np.sum(lda_pred_posterior[:, 0] >= 0.5))
print(np.sum(lda_pred_posterior[:, 1] >= 0.5))
print(lda_pred_posterior[:20, 0])
print(test_df.lda_pred_class[:20])
print(np.sum(lda_pred_posterior[:, 0] >= 0.9))
print(np.max(lda_pred_posterior[:, 0]))

# QUADRATIC DISCRIMINANT ANALYSIS
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
# Create Classifier Instance
qda_clf = QDA()
# Fit model
qda_clf.fit(X_train, Y_train)
print('Class Priors (P(Y = k)) =', qda_clf.priors_)
print('Class Means μk\n   Down:', qda_clf.means_[0], '\n   Up:  ',
      lda_clf.means_[1])

qda_pred_class_coded = qda_clf.predict(test_df[predictors])
qda_pred_class = ['Up' if c == 1 else 'Down' for c in qda_pred_class_coded]
test_df['qda_pred_class'] = qda_pred_class
print('The model makes {0:.4f} correct predictions'.format(
    100 * np.mean(test_df.qda_pred_class == test_df.Direction)))

# Compute Test Confusion Matrix #
#################################
table = pd.crosstab(test_df.qda_pred_class, test_df.Direction)
print(table)
예제 #40
0
from sklearn import tree
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

#[height, weight, shoe size]
X = [[181, 80, 44], [177, 70, 43], [160, 60, 38], [154, 54, 37], [166, 65, 40],
     [190, 90, 47], [175, 64, 39], [177, 70, 40], [159, 55, 37], [171, 75, 42],
     [181, 85, 43]]

Y = [
    'male', 'female', 'female', 'female', 'male,', 'male', 'male', 'female',
    'male', 'female', 'male'
]

clf = tree.DecisionTreeClassifier()
clf = QuadraticDiscriminantAnalysis()
clf = clf.fit(X, Y)

prediction = clf.predict([[190, 70, 43]])

print(prediction)
예제 #41
0
    ytrain = np.concatenate((y1[:j], y1[j + n // 3:]))
    sgd.fit(xtrain[:1000], ytrain[:1000])
    j += n // 3
    print(sgd.score(xtest, ytest))

from sklearn.metrics import plot_confusion_matrix

plot_confusion_matrix(sgd, xtest, ytest, normalize='pred')
sns.set(font_scale=0.75)
plt.show()

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

qd = QuadraticDiscriminantAnalysis()

qd.fit(xtrain, ytrain)

qd.score(xtest, ytest)

qd.score(xtrain, ytrain)

n = len(x)
j = 0
x1 = x.to_numpy()
y1 = y.to_numpy()
for i in range(3):
    xtest = x1[j:j + n // 3]
    ytest = y1[j:j + n // 3]
    xtrain = np.concatenate((x1[:j], x1[j + n // 3:]))
    ytrain = np.concatenate((y1[:j], y1[j + n // 3:]))
    qd.fit(xtrain[:1000], ytrain[:1000])
def main():

    if (sys.argv[1] == 'generate'):

        n_pts1 = 100 ;n_pts2 = 1000

        # Data parameters
        mean_1 = [0, 0]
        cov_1 = np.array([[1, 0], [0, 1]])
        mean_2 = [3, -1]
        cov_2 = np.array([[1, 0], [0, 5]])

        # Create and save data
        val_1 = np.random.multivariate_normal(mean=mean_1,
                                              cov=cov_1,
                                              size=(n_pts1, ))
        val_2 = np.random.multivariate_normal(mean=mean_2,
                                              cov=cov_2,
                                              size=(n_pts2, ))

        label_1 = np.array(([0] * n_pts1), dtype=int)
        label_2 = np.array(([1] * n_pts2), dtype=int)

        np.savetxt("./data1.txt", val_1)
        np.savetxt("./data2.txt", val_2)
        np.savetxt("./data1_label.txt", label_1)
        np.savetxt("./data2_label.txt", label_2)

        print val_1
        print val_2

        sys.exit("Exiting!")

    elif (sys.argv[1] == 'run'):

        # Load data from file
        val_1 = np.loadtxt("./data1.txt")
        val_2 = np.loadtxt("./data2.txt")
        label_1 = np.loadtxt("./data1_label.txt")
        label_2 = np.loadtxt("./data2_label.txt")

        # Get data in requred form
        X = np.vstack((val_1, val_2))
        y = np.hstack((label_1, label_2))
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.3)

        # Start training
        lda_first = LinearDiscriminantAnalysis(solver='svd',
                                               store_covariance=True)
        qda_first = QuadraticDiscriminantAnalysis(store_covariances=True)
        lda_first.fit(X=X_train, y=y_train)
        qda_first.fit(X=X_train, y=y_train)

        # Print the variables
        print_data(lda_first, qda_first, X_test, y_test)

        # Plot decision boundry
        PLOT_DB(lda_first, qda_first, X, y, X_test, y_test)

        plt.show()
예제 #43
0
    k=7)  #iterate the k from 1 to 120. The max. accuracy comes at k=7 .
fclass.fit(X_R2L, Y_R2L)
true = fclass.get_support()
fclasscolindex_R2L = [i for i, x in enumerate(true) if x]
fclasscolname_R2L = list(colNames[i] for i in fclasscolindex_R2L)
print('Features selected :', fclasscolname_R2L)

features = newdf[fclasscolname_R2L].astype(float)
features1 = newdf_test[fclasscolname_R2L].astype(float)
lab = newdf['label']
lab1 = newdf_test['label']

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
clf = QuadraticDiscriminantAnalysis()
t0 = time()
clf.fit(features, lab)
tt = time() - t0
print("Classifier trained in {} seconds".format(round(tt, 3)))
t0 = time()
pred = clf.predict(features1)
tt = time() - t0
print("Predicted in {} seconds".format(round(tt, 3)))
from sklearn.metrics import accuracy_score
acc = accuracy_score(pred, lab1)
print("Accuracy is {}.".format(round(acc, 4)))
print(
    pd.crosstab(lab1,
                pred,
                rownames=['Actual attacks'],
                colnames=['Predicted attacks']))
예제 #44
0


##Logistic regression
lgRg=linear_model.LogisticRegression()
lgRg.fit(x_train,y_train)
lgRg.score(x_test,y_test) 

## Least squares support vector machines
from sklearn import svm
clf=svm.SVC()
clf.fit(x_tclf = neighbors.KNeighborsClassifier(15, weights=weights)
clf.fit(x_train, y_train)
clf.score(x_test,y_test)

#Quadratic Classifiers (Quadratic Discriminant Analysis)
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
clf=QuadraticDiscriminantAnalysis()
clf.fit(x_train,y_train)
clf.score(x_test,y_test)


## K-Nearest Neighbour
from sklearn import neighbors
clf = neighbors.KNeighborsClassifier(15, weights="")
clf.fit(x_train, y_train)
clf.score(x_test,y_test)

## Random Forest
## Neural Networks
## Learning Vector Quantization
X2 = df2018[feature].values
X2 = StandardScaler().fit_transform(X2)
y2 = df2018['Label'].values
#y2 = le.fit_transform(df201['Label'].values)

# Perform LDA classifier
lda_classifier = LDA(n_components=2)
lda_classifier.fit(
    X,
    y,
)
y_pred1 = lda_classifier.predict(X2)

# Perform QDA classifier
qda_classifier = QDA()
qda_classifier.fit(X, y)
y_pred2 = qda_classifier.predict(X2)

# Question 1: Equation for LDA
w1 = round(lda_classifier.coef_[0][0], 4)
w2 = round(lda_classifier.coef_[0][1], 4)
w0 = round(lda_classifier.intercept_[0], 4)
print('The equation is: g(x) =', w0, '+', w1, '* mean +', w2, '* std')

# Question 2: Accuracy for year 2018
accuracy_lda = metrics.accuracy_score(y2, y_pred1)
print('The accuracy for LDA classifier for year 2018 is',
      round(accuracy_lda, 2))
accuracy_qda = metrics.accuracy_score(y2, y_pred2)
print('The accuracy for QDA classifier for year 2018 is',
      round(accuracy_qda, 2))
X = training.iloc[:,1:-1].values
y = training['country_destination'].values
"""
# Use Discriminant Analysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
trans = LinearDiscriminantAnalysis(n_components=3)
trans.fit(X,y)
X = trans.transform(X)
"""
# Split Up Data
x_train,x_valid,y_train,y_valid = train_test_split(X,y,test_size=0.3,random_state=None)

# Train classifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
clf = QuadraticDiscriminantAnalysis(reg_param=0.00001)
clf.fit(x_train,y_train)

# Run Predictions
from sklearn.metrics import confusion_matrix, accuracy_score
y_preds = clf.predict(x_valid)
print( confusion_matrix(y_valid,y_preds) );
print( "Accuracy: %f" % (accuracy_score(y_valid,y_preds)) );
f = open('qda_take1.txt', 'w')
f.write( str(confusion_matrix(y_valid,y_preds)) );
f.write( "\nAccuracy: %f" % (accuracy_score(y_valid,y_preds)) );
f.write( "\nclf = QuadraticDiscriminantAnalysis(0.00001)" );

# Now on to final submission
x_final = testing.iloc[:,1:].values
y_final = clf.predict(x_final).reshape([62096,]);
y_final = pd.DataFrame(y_final);
y = y.astype('int')
#print(X)

# In[4]:

train_dataset = train.values
X = train_dataset[:, 2:]
y = train_dataset[:, 1]
y = y.astype('int')
test_dataset = test.values
X_test = test_dataset[:, 2:]
print(type(X_test))
print('X.shape, y.shape, X_test.shape', X.shape, y.shape, X_test.shape)

df = pd.DataFrame({"SK_ID_CURR": df['SK_ID_CURR']})

print('QuadraticDiscriminantAnalysis****************')
qda = QuadraticDiscriminantAnalysis()
print('fitting****************')
qda_train = qda.fit(X, y)
print('predicting on train****************')
qda_X_prediction = qda.predict_proba(X)[:, 1]
print('predicting on test****************')
qda_X_test_prediction = qda.predict_proba(X_test)[:, 1]
tr_te_concatenated = np.concatenate([qda_X_prediction, qda_X_test_prediction])
df['quadratic_discriminant_analysis'] = tr_te_concatenated

print('final tr_te shape', df.shape)
df.to_csv('quadratic_discriminant_analysis_tr_te.csv', index=False)
print(df.head())
예제 #48
0
print("Процент верных предсказаний: %.1f%%" % (hit_rate1 * 100))

# Linear Discriminant
model2 = LDA()
model2.fit(x_train, y_train)  # Обучение (подбор параметров модели)
d['Predict_LDA'] = model2.predict(x_test)  # Тест

# Считаем процент правильно предсказанных направлений изменения цены:
d["Correct_LDA"] = (1.0 + d['Predict_LDA'] * d["FACT"]) / 2.0
print(d)
hit_rate2 = np.mean(d["Correct_LDA"])
print("Процент верных предсказаний: %.1f%%" % (hit_rate2 * 100))

# Qadrical Discriminant
model3 = QDA()
model3.fit(x_train, y_train)  # Обучение (подбор параметров модели)
d['Predict_QDA'] = model3.predict(x_test)  # Тест

# Считаем процент правильно предсказанных направлений изменения цены:
d["Correct_QDA"] = (1.0 + d['Predict_QDA'] * d["FACT"]) / 2.0
print(d)
hit_rate3 = np.mean(d["Correct_QDA"])
print("Процент верных предсказаний: %.1f%%" % (hit_rate3 * 100))

# Take AVG from result's
rates = [hit_rate1, hit_rate2, hit_rate3]
if (np.average(rates) > 0.5): change_sum = 1
else: change_sum = -1

#
# Procent's
예제 #49
0
import window_s_p_ft as win
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.cross_validation import train_test_split


total_score = 0
stop = 1000
for x in range(stop):
    clf = QuadraticDiscriminantAnalysis()
    data = win.getStudents()
    data_train, data_test = train_test_split(data, test_size=0.2)
    data_train_labels = [s.spec for s in data_train]
    data_test_labels = [s.spec for s in data_test]
    data_train = [s.grades for s in data_train]
    data_test = [s.grades for s in data_test]
    clf.fit(data_train, data_train_labels)
    total_score += clf.score(data_test, data_test_labels)
total_score = total_score / stop
print("all")
print(total_score)

specs = ["FK", "FM", "MN", "OE"]
for sp in specs:
    total_score = 0
    for x in range(stop):
        clf = QuadraticDiscriminantAnalysis()
        data = win.getStudents()
        data_train, data_test = train_test_split(data, test_size=0.2)
        data_train_labels = [s.spec if s.spec == sp else "NOT " + sp for s in data_train]
        data_test_labels = [s.spec if s.spec == sp else "NOT " + sp for s in data_test]
        data_train = [s.grades for s in data_train]
예제 #50
0
plt.grid()


# ### ¿Cómo se vería la frontera de clasificación usando un FDG?

# In[4]:


from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
N = 100
x, y  = np.random.multivariate_normal(Mean, Cov, N).T
x2, y2  = np.random.multivariate_normal(Mean2, Cov2, N).T
X = np.r_[np.c_[x,y],np.c_[x2,y2]]
Y = np.r_[np.ones((N,1)),np.zeros((N,1))]
clf = QuadraticDiscriminantAnalysis()
clf.fit(X,Y.flatten())
plt.scatter(X[:,0],X[:,1],c=Y.flatten(), cmap='Set2',alpha=0.5)

h = .02  # step size in the mesh
# create a mesh to plot in
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))

# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.contour(xx, yy, Z, cmap=plt.cm.Blues)
예제 #51
0
파일: LDA.py 프로젝트: flyxu/scikit-learn
    splot.add_artist(ell)
    splot.set_xticks(())
    splot.set_yticks(())


def plot_lda_cov(lda, splot):
    plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red')
    plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue')


def plot_qda_cov(qda, splot):
    plot_ellipse(splot, qda.means_[0], qda.covariances_[0], 'red')
    plot_ellipse(splot, qda.means_[1], qda.covariances_[1], 'blue')

###############################################################################
for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]):
    # Linear Discriminant Analysis
    lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
    y_pred = lda.fit(X, y).predict(X)
    splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1)
    plot_lda_cov(lda, splot)
    plt.axis('tight')

    # Quadratic Discriminant Analysis
    qda = QuadraticDiscriminantAnalysis(store_covariances=True)
    y_pred = qda.fit(X, y).predict(X)
    splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2)
    plot_qda_cov(qda, splot)
    plt.axis('tight')
plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant Analysis')
plt.show()
What is the training misclassification rate?
"""

lda1 = LDA(solver="svd", store_covariance=True)
lda1.fit(warX,warY)

my_lda_pred = pd.DataFrame()
my_lda_pred["pred"] = ["No" if x == 0 else "Yes" for x in lda1.predict(warX)]
my_lda_pred["actual"] = ["No" if x == 0 else "Yes" for x in war["start"]]
conf_lda = pd.crosstab(my_lda_pred["pred"], my_lda_pred["actual"])
conf_lda

(1/(war.shape[0])) * (conf_lda.iloc[1,0] + conf_lda.iloc[0,1])


"""
6.69%
"""

qda1 = QDA(store_covariances=True)
qda1.fit(warX,warY)

test = qda1.predict_proba(warX)

my_qda_pred = pd.DataFrame()
my_qda_pred["pred"] = ["No" if x < .5 else "Yes" for x in qda1.predict(warX)]
my_qda_pred["actual"] = ["No" if x == 0 else "Yes" for x in war["start"]]
conf_qda = pd.crosstab(my_qda_pred["pred"], my_qda_pred["actual"])
conf_qda

(1/(war.shape[0])) * (conf_qda.iloc[1,0] + conf_qda.iloc[0,1])
예제 #53
0
    
    for i in range(0,9):
        labels.append(1)
    for i in range(9,18):
        labels.append(2)
    for i in range(18, 27):
        labels.append(3)
    '''
    # Creation of random labels
    for i in range(0,27):
        labels.append(int(random.random() * 3) + 1)
    print (labels)
    '''
    # QDA model
    qda = QuadraticDiscriminantAnalysis()
    qda.fit(comps, labels)

    # MCC Calculation
    y_pred = qda.predict(comps)
    #print(labels)
    #print(y_pred)
    mcc = multimcc(labels,y_pred)
    print("MCC="+str(mcc))

    '''
    # Plotting QDA contour
    nx, ny = 200, 100
    x_min, x_max = np.amin(comps[:,0]), np.amax(comps[:,0])
    y_min, y_max = np.amin(comps[:,1]), np.amax(comps[:,1])
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, nx),np.linspace(y_min, y_max, ny))
    Z = qda.predict_proba(np.c_[xx.ravel(), yy.ravel()])
예제 #54
0
# In[4]:

# CHALLENGE - ...and train them on our data
# Training the models
clf_tree.fit(X, Y)
clf_svm.fit(X, Y)
clf_perceptron.fit(X, Y)
clf_KNN.fit(X, Y)
clf_svm_RBF.fit(X, Y)
clf_GPC.fit(X, Y)
clf_RFC.fit(X, Y)
clf_NN.fit(X, Y)
clf_ADA.fit(X, Y)
clf_GNB.fit(X, Y)
clf_QDA.fit(X, Y)

#prediction = clf.predict([[190, 70, 43]])

# In[5]:

# Testing using the same data
pred_tree = clf_tree.predict(X)
acc_tree = accuracy_score(Y, pred_tree) * 100
print('Accuracy for DecisionTree: {}'.format(acc_tree))

# In[6]:

# Linear SVM
pred_svm = clf_svm.predict(X)
acc_svm = accuracy_score(Y, pred_svm) * 100
예제 #55
0
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')


#define X y
X, y = data.loc[:,data.columns != 'state'].values, data.loc[:,data.columns == 'state'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

#smoteen
sme = SMOTEENN(random_state=42)
os_X,os_y = sme.fit_sample(X_train,y_train)

#QDA
clf_QDA = QuadraticDiscriminantAnalysis(store_covariances=True)
clf_QDA.fit(os_X, os_y)
y_true, y_pred = y_test, clf_QDA.predict(X_test)

#F1_score, precision, recall, specifity, G score
print "F1_score : %.4g" % metrics.f1_score(y_true, y_pred)  
print "Recall : %.4g" % metrics.recall_score(y_true, y_pred)
recall = metrics.recall_score(y_true, y_pred)  
print "Precision : %.4g" % metrics.precision_score(y_true, y_pred)
 
#Compute confusion matrix
cnf_matrix = confusion_matrix(y_test,y_pred)
np.set_printoptions(precision=2)
print "Specifity: " , float(cnf_matrix[0,0])/(cnf_matrix[0,0]+cnf_matrix[0,1])
specifity = float(cnf_matrix[0,0])/(cnf_matrix[0,0]+cnf_matrix[0,1]) 
print "G score: " , math.sqrt(recall/ specifity) 
예제 #56
0
def plot_lda_cov(lda, splot):
    plot_ellipse(splot, lda.means_[0], lda.covariance_, 'red')
    plot_ellipse(splot, lda.means_[1], lda.covariance_, 'blue')


def plot_qda_cov(qda, splot):
    plot_ellipse(splot, qda.means_[0], qda.covariance_[0], 'red')
    plot_ellipse(splot, qda.means_[1], qda.covariance_[1], 'blue')


plt.figure(figsize=(10, 8), facecolor='white')
for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]):
    # Linear Discriminant Analysis
    lda = LinearDiscriminantAnalysis(solver="svd", store_covariance=True)
    y_pred = lda.fit(X, y).predict(X)
    splot = plot_data(lda, X, y, y_pred, fig_index=2 * i + 1)
    plot_lda_cov(lda, splot)
    plt.axis('tight')

    # Quadratic Discriminant Analysis
    qda = QuadraticDiscriminantAnalysis(store_covariance=True)
    y_pred = qda.fit(X, y).predict(X)
    splot = plot_data(qda, X, y, y_pred, fig_index=2 * i + 2)
    plot_qda_cov(qda, splot)
    plt.axis('tight')
plt.suptitle('Linear Discriminant Analysis vs Quadratic Discriminant Analysis',
             y=1.02,
             fontsize=15)
plt.tight_layout()
plt.show()
예제 #57
0
      up_probs[idx_g50].size)
print('\nNumber of days with probability < 50% for market to be up:',
      up_probs[idx_l50].size)

#Indices of events with posterior probability > 90%#
idx_g90 = up_probs > 0.9

print('\nNumber of days with probability > 90% for market to be up:',
      up_probs[idx_g90].size)

### QUADRATIC DISCRIMINANT ANALYSIS ###
# Initiate QDA object
qda_clf = QuadraticDiscriminantAnalysis()

# Fit model. Let Xs_train = matrix of new_pred, Ys_train = matrix of variables.
resqda_clf = qda_clf.fit(Xs_train, Ys_train)

#Predicted values for training set
Ys_pred_qda = resqda_clf.predict(Xs_test)

#Prior probabilities#
print("\nPrior probabilities")
print(resqda_clf.classes_)
print(resqda_clf.priors_)

#Group means#
print("\nGroup means")
#print(resqda_clf.classes_)
print(resqda_clf.means_)

#Confusion matrix#
def quadratic_discriminant_fn(x_train, y_train):
    model = QuadraticDiscriminantAnalysis()
    model.fit(x_train, y_train)

    return model
예제 #59
0
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

# 임의의 X, y값 지정.
X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])

# LDA모델 구축.
clf = LinearDiscriminantAnalysis()
clf.fit(X, y)

# predict.
print(clf.predict([[-0.8, -1]]))

# QDA를 이용한 예측.
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

clf2 = QuadraticDiscriminantAnalysis()
clf2.fit(X, y)

# confusion matrix를 통해 LDA와 QDA 비교.
from sklearn.metrics import confusion_matrix
y_pred = clf.predict(X)
confusion_matrix(y, y_pred)
y_pred2 = clf2.predict(X)
confusion_matrix(y, y_pred2)
예제 #60
0
##############################################
# Now apply the transformations to the data:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
print 'Trasformazione applicata ai dati correttamente'

###############################################
# Next we create an instance of the model
clf = QuadraticDiscriminantAnalysis()
print 'QDA creato correttamente'

###############################################
print 'Fit dei dati in corso'
# Fit the training data to our model
clf.fit(X_train, y_train)
print 'Train eseguito con successo'
#print mlp

##################################################
predictions = clf.predict(X_test)
#print predictions
##################################################
print(confusion_matrix(y_test, predictions))

print(classification_report(y_test, predictions, digits=4))

####################################################
#print len(mlp.coefs_)
#print len(mlp.coefs_[0])