def call_function():
    try:
        # prepare data
        trainingSet = []
        testSet = []
        accuracy = 0.0
        split = 0.25
        loadDataset("/".join([DATASET_FOLDER, 'med.data']), split, trainingSet,
                    testSet)
        # generate predictions
        predictions = []
        trainData = np.array(trainingSet)[:,
                                          0:np.array(trainingSet).shape[1] - 1]
        columns = trainData.shape[1]
        X = np.array(trainData).astype(np.float)
        y = np.array(trainingSet)[:, columns].astype(np.float)
        clf = QDA()
        clf.fit(X, y)
        testData = np.array(testSet)[:, 0:np.array(trainingSet).shape[1] - 1]
        X_test = np.array(testData).astype(np.float)
        y_test = np.array(testSet)[:, columns].astype(np.float)
        accuracy = clf.score(X_test, y_test)
        accuracy *= 100
        print("Accuracy %:", accuracy)
    except:
        e = sys.exc_info()[0]
        print("<p>Error: %s</p>" % e)
Beispiel #2
0
    def create_symbol_forecast_model(self):
        #Create a lagged series of the SP500 Stock market
        snpret = create_lagged_series(self.symbol_list[0],
                                      self.model_start_date,
                                      self.model_end_date,
                                      lags=5)

        ##use the prior two days of returns as predictors
        X = snpret[["Lag1", "Lag2"]]
        y = snpret["Direction"]

        #Create training and test sets
        start_test = self.model_start_test_date
        X_train = X[X.index < start_test]
        X_test = X[X.index >= start_test]
        y_train = y[y.index < start_test]
        y_test = y[y.index >= start_test]
        """
        NOTE: we can replace the model with a random fores, SVM, or 
        Logit Regression. just import the library and change the model=QDA()
        line
        """
        model = QDA()
        model.fit(X_train, y_train)
        return model
Beispiel #3
0
    def create_symbol_forecast_model(self):
        # Create a lagged series of the S&P500 US stock market index
        df_ret = create_lagged_series(self.symbol_list[0],
                                      self.model_start_date,
                                      self.model_end_date,
                                      self.model_interval,
                                      lags=5)

        # Use the prior two days of returns as predictor
        # values, with direction as the response
        X = df_ret[["Lag1", "Lag2"]]
        Y = df_ret["Direction"]

        # Create training and test sets
        start_test = self.model_start_test_date
        X_train = X[X.index < start_test]
        X_train = X[X.index > X.index[
            2]]  # avoid 2 nan values TODO --> filter one is timestamp other datetime index
        X_test = X[X.index >= start_test]
        Y_train = Y[Y.index < start_test]
        Y_train = Y[Y.index > Y.index[2]]
        Y_test = Y[Y.index >= start_test]
        """
        Here we choose QDA, but the strategy would be dependent on different parameters.
        There is requirements to test the strategy with different models, k-fold cross validation,
        and also grid searching for parameters optimization
        """
        model = QDA()
        model.fit(
            X_train, Y_train
        )  # TODO --> The model could be fit on the whole dataset, this is on model validation
        return model
    def create_symbol_forecast_model(self):
        '''
        It essentially calls forecasting_ES_movements
        :return: model
        '''
        # Create a lagged series of the SP500 US stock market index
        snpret = create_lagged_series(list(self.symbol_dict.keys())[0],
                                      self.model_start_date,
                                      self.model_end_date,
                                      lags=5)

        # Use the prior two days of return as predictor values, with direction as teh response.
        snpret = snpret[snpret['Lag5'].notnull()]
        X = snpret[['Lag1', 'Lag2']]
        y = snpret['Direction']

        # Create training and test sets
        start_test = self.model_start_test_date
        X_train = X[X.index < start_test]
        X_test = X[X.index >= start_test]
        y_train = y[y.index < start_test]
        y_test = y[y.index >= start_test]

        model = QDA()
        model.fit(X_train, y_train)
        return model
Beispiel #5
0
def main():
    """
    Main function.

    Args:
    """
	# prepare data
	trainingSet=[]
	testSet=[]
	accuracy = 0.0
	split = 0.25
	loadDataset('../Dataset/combined.csv', split, trainingSet, testSet)
	print 'Train set: ' + repr(len(trainingSet))
	print 'Test set: ' + repr(len(testSet))
	# generate predictions
	predictions=[]
	trainData = np.array(trainingSet)[:,0:np.array(trainingSet).shape[1] - 1]
  	columns = trainData.shape[1] 
	X = np.array(trainData)
	y = np.array(trainingSet)[:,columns]
	clf = BaggingClassifier(QDA())
	clf.fit(X, y)
	testData = np.array(testSet)[:,0:np.array(trainingSet).shape[1] - 1]
	X_test = np.array(testData)
	y_test = np.array(testSet)[:,columns]
	accuracy = clf.score(X_test,y_test)
	accuracy *= 100
	print("Accuracy %:",accuracy)	
def main():
    dataset = pd.read_csv("shuttle.csv", header=None).values.astype(np.int32,
                                                                    copy=False)
    data_train = dataset[0:int(len(dataset) * 0.6)]
    data_test = dataset[int(len(dataset) * 0.6) + 1:]
    x, y = np.array([]), np.array([])
    for row in dataset:
        if (row[-1] == 4 or row[-1] == 5):
            x = np.vstack(
                (x, [row[3], row[6]])) if len(x) != 0 else [row[3], row[6]]
            y = np.append(y, row[-1] - 4)

#<class 'list'>: [11478, 13, 39, 2155, 809, 4, 2] =>  4, 5

    lda = LDA(solver="svd", store_covariance=True)
    splot = visualization(dataset[:, 3], dataset[:, 6], dataset[:, -1])
    splot = plot_data(lda, x, y, lda.fit(x, y).predict(x))
    plt.axis('tight')
    plt.show()

    lda = lda.fit(data_train[:, :-1], data_train[:, -1])
    lda = lda.score(data_test[:, :-1], data_test[:, -1])
    qda = QDA(store_covariances=True)
    qda = qda.fit(data_train[:, :-1], data_train[:, -1])
    qda = qda.score(data_test[:, :-1], data_test[:, -1])

    print("Linear Discriminant Analysis: ", lda)
    print("Quadratic Discriminant Analysis: ", qda)
Beispiel #7
0
 def __init__(self):
     self.classifiers = [
         ["Random Forest", RandomForestClassifier()],
         ["Logistic Regression",
          LogisticRegression()],
         ["Stochastic Gradient Descent",
          SGDClassifier()],
         ["Nearest Neighbors", KNeighborsClassifier()],
         ["Linear SVM", SVC(kernel="linear")],
         ["Polynomial SVM", SVC(kernel="poly")],
         ["RBF SVM", SVC(kernel="rbf")],
         ["Sigmoid SVM", SVC(kernel="sigmoid")],
         ["Decision Tree", DecisionTreeClassifier()],
         ["Extra Tree", ExtraTreesClassifier()],
         ["Gradient Boosting",
          GradientBoostingClassifier()],
         ["AdaBoost", AdaBoostClassifier()],
         ["Naive Bayes", GaussianNB()],
         ["Linear Discriminant Analysis",
          LDA()],
         ["Quadratic Discriminant Analysis",
          QDA()],
         ["Gaussian Process",
          GaussianProcessClassifier()],
         ["Multi-Layer Perceptron",
          MLPClassifier()],
     ]
 def classify_using_random_sampling(self, X_train, X_test, y_train, y_test, portion_of_sampled_dataset_vector, classifiers_for_experiments):
     psa = PSA()
     # ---- settings:
     number_of_runs_for_random_sampling = 20
     # ---- Experimenting:
     recognition_rate_LIST = np.zeros((len(classifiers_for_experiments), len(portion_of_sampled_dataset_vector)))
     classifier_index = 0
     for classifier in classifiers_for_experiments:
         print('############### Classifier: ' + classifier)
         portion_index = 0
         for portion_of_sampled_dataset in portion_of_sampled_dataset_vector:
             print('###### Portion of sampled dataset: ' + str(portion_of_sampled_dataset * 100) + '%')
             # ---- data reduction with random sampling:
             recognition_rate_with_random_sampling = [None] * number_of_runs_for_random_sampling
             for run_index in range(number_of_runs_for_random_sampling):
                 shuffled_samples = self.shuffle_samples_randomly(X=X_train, y=y_train)  # shuffle samples of classes randomly
                 # ---- data reduction:
                 number_of_classes = len(shuffled_samples)
                 n_samples = []
                 for class_index in range(number_of_classes):
                     number_of_samples_of_class = shuffled_samples[class_index].shape[0]
                     n_samples.append(int(number_of_samples_of_class * portion_of_sampled_dataset))
                 X, y = psa.reduce_data(sorted_samples=shuffled_samples, n_samples=n_samples)
                 # ---- report number of sampled data after PSA:
                 if run_index == 0:  # only report once in the multiple runs
                     print('number of sampled data in classes, after random sampling: ' + str(n_samples))
                 # ---- classify with random sampling:
                 if classifier == 'SVM':
                     # --------- train:
                     clf = SVC(kernel='linear')
                     clf.fit(X=X, y=y)
                 elif classifier == 'LDA':
                     # --------- train:
                     clf = LDA()
                     clf.fit(X=X, y=y)
                 elif classifier == 'QDA':
                     # --------- train:
                     clf = QDA()
                     clf.fit(X=X, y=y)
                 elif classifier == 'Random Forest':
                     # --------- train:
                     clf = RF(max_depth=2, random_state=0)
                     clf.fit(X=X, y=y)
                 elif classifier == 'Logistic Regression':
                     # --------- train:
                     clf = LR()
                     clf.fit(X=X, y=y)
                 elif classifier == 'Gaussian Naive Bayes':
                     # --------- train:
                     clf = GaussianNB()
                     clf.fit(X=X, y=y)
                 # --------- test:
                 labels_predicted = clf.predict(X_test)
                 recognition_rate_with_random_sampling[run_index] = (sum(labels_predicted == y_test) / len(labels_predicted)) * 100
             recognition_rate_with_random_sampling_average = np.mean(recognition_rate_with_random_sampling)
             print('The recognition rate using ' + classifier + ' with data number reduction (random sampling): ' + str(recognition_rate_with_random_sampling_average))
             recognition_rate_LIST[classifier_index, portion_index] = recognition_rate_with_random_sampling_average
             portion_index += 1
         classifier_index += 1
     return recognition_rate_LIST
def qda(train_x, train_y, test_x):
    model = QDA(priors=None,
                reg_param=0.0,
                store_covariance=False,
                store_covariances=None,
                tol=0.0001)
    return _classify(model, train_x, train_y, test_x)
Beispiel #10
0
    def fit(self, X, Y):
        from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA
        import sklearn.multiclass

        estimator = QDA(self.reg_param)

        if len(Y.shape) == 2 and Y.shape[1] > 1:
            self.estimator = sklearn.multiclass.OneVsRestClassifier(estimator,
                                                                    n_jobs=1)
        else:
            self.estimator = estimator

        self.estimator.fit(X, Y)

        if len(Y.shape) == 2 and Y.shape[1] > 1:
            problems = []
            for est in self.estimator.estimators_:
                problem = np.any(
                    np.any([np.any(s <= 0.0) for s in est.scalings_]))
                problems.append(problem)
            problem = np.any(problems)
        else:
            problem = np.any(
                np.any([np.any(s <= 0.0) for s in self.estimator.scalings_]))
        if problem:
            raise ValueError('Numerical problems in QDA. QDA.scalings_ '
                             'contains values <= 0.0')

        return self
def performClassification(X_train,
                          y_train,
                          X_test,
                          y_test,
                          method,
                          parameters={}):
    """
        Perform Classification with the help of serveral Algorithms.
    """

    print('Performing ' + method + ' Classification...')
    print('Size of train set: ', X_train.shape)
    print('Size of test set: ', X_test.shape)
    print('Size of train set: ', y_train.shape)
    print('Size of test set: ', y_test.shape)

    classifiers = [
        RandomForestClassifier(n_estimators=100, n_jobs=-1),
        neighbors.KNeighborsClassifier(),
        SVC(degree=100, C=10000, epsilon=.01),
        AdaBoostRegressor(),
        AdaBoostClassifier(**parameters)(),
        GradientBoostingClassifier(n_estimators=100),
        QDA(),
    ]

    scores = []

    for classifier in classifiers:
        scores.append(benchmark_classifier(classifier, \
            X_train, y_train, X_test, y_test))

    print(scores)
Beispiel #12
0
def fitting(algorithm, X_train_std, y_train, X_test_std, y_test, cluster_name,
            path_results, name_model):
    if algorithm == 'gpc':
        kernel = 1.0 * RBF([1.0, 1.0, 1.0, 1.0, 1.0])  # for GPC
        #kernel = 1.0 * RBF(1.0)
        mod = GaussianProcessClassifier(kernel=kernel, random_state=0)
        mod.fit(X_train_std, y_train)
        print("Kernel {}".format(mod.kernel_))

    if algorithm == 'svc':
        n_gamma = 30
        n_C = 30
        edges_gamma = [0.1, 20]
        edges_C = [0.1, 20]
        gamma_par = np.linspace(edges_gamma[0], edges_gamma[1], n_gamma)
        C_par = np.linspace(edges_C[0], edges_C[1], n_C)
        svm = SVC(kernel='rbf', random_state=0, probability=True)
        param_grid = [{'C': C_par, 'gamma': gamma_par}]
        mod = GridSearchCV(estimator=svm,
                           param_grid=param_grid,
                           scoring='accuracy',
                           cv=5,
                           n_jobs=-1)
        mod.fit(X_train_std, y_train)

    if algorithm == 'gnb':
        mod = GaussianNB()
        mod.fit(X_train_std, y_train)

    if algorithm == 'qda':
        mod = QDA()
        mod.fit(X_train_std, y_train)

    return mod
Beispiel #13
0
def perform_training(method, X_train, y_train, X_test, y_test, lag, delta,
                     threshold):

    if method == 'LR':
        model = LogisticRegression()
    elif method == 'LDA':
        model = LDA()
    elif method == 'QDA':
        model = QDA()
    elif method == 'RF':
        model = RandomForestClassifier(n_estimators=1000, n_jobs=-1)
    elif method == 'KNN':
        model = KNeighborsClassifier()
    elif method == 'ADA':
        model = AdaBoostClassifier()
    elif method == 'GTB':
        model = GradientBoostingClassifier(n_estimators=100)
    else:
        print('Invalid method', method)

    model.fit(X_train, y_train.values.ravel())

    y_pred = model.predict(X_test)

    hit_rate = (sum(y_pred == y_test.iloc[:, 0])) / len(y_pred)

    model_dict = {}

    model_dict["method"] = method
    model_dict["model"] = model
    model_dict["lag"] = lag
    model_dict["delta"] = delta

    return hit_rate, model_dict
def call_function():
    # prepare data
    try:
        trainingSet = []
        testSet = []
        accuracy = 0.0
        split = 0.25
        loadDataset("/".join([DATASET_FOLDER, 'comb.csv']), split, trainingSet,
                    testSet)
        print('Train set: ' + repr(len(trainingSet)))
        print('Test set: ' + repr(len(testSet)))
        # generate predictions
        predictions = []
        trainData = np.array(trainingSet)[:,
                                          0:np.array(trainingSet).shape[1] - 1]
        columns = trainData.shape[1]
        X = np.array(trainData)
        y = np.array(trainingSet)[:, columns]
        clf = BaggingClassifier(QDA())
        clf.fit(X, y)
        testData = np.array(testSet)[:, 0:np.array(trainingSet).shape[1] - 1]
        X_test = np.array(testData)
        y_test = np.array(testSet)[:, columns]
        accuracy = clf.score(X_test, y_test)
        accuracy *= 100
        print("Accuracy %:", accuracy)
    except:
        e = sys.exc_info()[0]
        print("<p>Error: %s</p>" % e)
 def NLMmodelexp1():
     modelExperiment(
         nlmInsampleData, nlmOutsampleData, 'NLMdata/', fullFV,
         [LR(), DT(), KNC(), RF(),
          ABC(), GNB(), QDA()], [
              'LogisticRegression', 'DTree', 'KNN', 'RandomForest',
              'AdaBoosted', 'GaussianNB', 'QuadraticDiscriminantAnalysis'
          ], 'NLMmodelExperiment1.csv', 'NLMclassifier_plot1.png', True)
def qda(x_train, y_train, x_test, y_test, monkey):
    clf = QDA()
    clf.fit(x_train, y_train)
    QDA(priors=None, reg_param=0.0)
    y_pred = clf.predict(x_test)
    print(len(x_train))
    print(len(y_train))
    print("qda Accuracy:", metrics.accuracy_score(y_test, y_pred))
    print("qda confusion: ")
    print(metrics.confusion_matrix(y_test, y_pred))
    metrics.plot_confusion_matrix(clf,
                                  x_test,
                                  y_test,
                                  normalize='true',
                                  values_format='.0%')
    plt.title(str(monkey) + ": confusion_matrix - Qda algorithm")
    plt.show()
Beispiel #17
0
def qda_run(frame):
    from sklearn.discriminant_analysis import (QuadraticDiscriminantAnalysis as
                                               QDA)

    model = QDA()
    acc = run(model, frame.learning, frame.lindeps, frame.testing,
              frame.tindeps, RUNS)
    print("QDA accuracy:", acc)
def getQDA(featurevector, labels, featurelength=constants.DECOMP_LENGTH):

    #all default values except for n_components
    qda = QDA()

    qda.fit(featurevector[labels >= 0], labels[labels >= 0])

    return qda
Beispiel #19
0
def Loop_for_computataion(my_train_data, my_train_label, model_cnn, status,
                          iris_cifar):
    #Applying the K Fold using 5 splits as mentioned question
    lda = LDA()
    qda = QDA()
    nb = NB()
    rf = RandomForestClassifier(n_estimators=10,
                                criterion='entropy',
                                random_state=0)
    svm = SVC(kernel='rbf', random_state=0)
    dt = DecisionTreeClassifier(criterion='entropy', random_state=0)

    #CITATIONS:https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.StratifiedShuffleSplit.html
    #Even if i chnage the train and test size (Ex:Train 80% and test 20% I find slight variation in op I,e I mean
    #i have cross verified changing the sizes and fit it performs correctly)
    Kfold_stratified_shuffleop = StratifiedShuffleSplit(n_splits=5,
                                                        train_size=0.8,
                                                        test_size=0.2,
                                                        random_state=0)
    for training_values, testing_values in Kfold_stratified_shuffleop.split(
            my_train_data, my_train_label):

        #using the standard naming convention X_train X_test,y_train,y_test
        X_train, X_test = my_train_data[training_values], my_train_data[
            testing_values]
        y_train, y_test = my_train_label[training_values], my_train_label[
            testing_values]

        print("\n")
        print("TRAINING VALUES:", training_values, "TESTING VALUES:",
              testing_values)
        print("\n")

        if status == 3:
            print("ENABLING PCA")
            meshgrid_pca_analysis(X_train, X_test, y_train, y_test, lda, qda,
                                  nb, rf, dt, svm, 1, iris_cifar)
        elif status == 1:
            compute_logic_supervised_learning(X_train, X_test, y_train, y_test,
                                              lda, qda, nb, rf, dt, svm, 1)
        elif status == 2:
            cnn_split = list(
                StratifiedShuffleSplit(n_splits=2,
                                       test_size=0.1).split(X_train, y_train))
            idx_tr, idx_val = cnn_split[0]
            X_val, y_val = X_train[idx_val], y_train[idx_val]
            X_tr, y_tr = X_train[idx_tr], y_train[idx_tr]

            X_val = X_val.reshape(len(X_val), 32, 32, 3)
            X_tr = X_tr.reshape(len(X_tr), 32, 32, 3)
            X_test = X_test.reshape(len(X_test), 32, 32, 3)

            y_val = np_utils.to_categorical(y_val, 10)
            y_tr = np_utils.to_categorical(y_tr, 10)
            model_cnn.fit(X_tr, y_tr, validation_data=(X_val, y_val))
            model_cnn.predict(X_test)
        else:
            print("No proper selection")
Beispiel #20
0
    def removeLabels(self, labeledGroupName, label=None, description=None, commandline=None): # pragma: no cover
        '''
        Delete labeled MeasuredParameterResources that have ResourceType.name=labeledGroupName (such as 'Labeled Plankton').  
        Restrict deletion to the other passed in options, if specified: label is like 'diatom', description is like 
        'Using Platform dorado, Parameter {'salinity': ('33.65', '33.70')} from 20130916T124035 to 20130919T233905'
        (commandline is too long to show in this doc string - see examples in usage note).  Note: Some metadatda
        ResourceTypes will not be removed even though the Resources that use them will be removed.
        '''
        # Remove MeasuredParameter associations with Resource (Labeled data)
        mprs = MeasuredParameterResource.objects.using(self.args.database).filter(resource__resourcetype__name=labeledGroupName
                                ).select_related('resource')
        if label:
            mprs = mprs.filter(resource__name=LABEL, resource__value=label)

        if self.args.verbose > 1:
            print("  Removing MeasuredParameterResources with type = '%s' and label = %s" % (labeledGroupName, label))

        rs = []
        for mpr in mprs:
            rs.append(mpr.resource)
            mpr.delete(using=self.args.database)

        # Remove Resource associations with Resource (label metadata), make rs list distinct with set() before iterating on the delete()
        if label and description and commandline:
            try:
                rrs = ResourceResource.objects.using(self.args.database).filter(
                                                    (QDA(fromresource__name=LABEL) & QDA(fromresource__value=label)) &
                                                    ((QDA(toresource__name=DESCRIPTION) & QDA(toresource__value=description)) |
                                                     (QDA(toresource__name=COMMANDLINE) & QDA(toresource__value=commandline)) ) )
                if self.args.verbose > 1:
                    print("  Removing ResourceResources with fromresource__value = '%s' and toresource__value = '%s'" % (label, description))

                for rr in rrs:
                    rr.delete(using=self.args.database)

            except TypeError:
                # Likely TypeError: __init__() got an unexpected keyword argument 'fromresource__name'
                if self.args.verbose > 1:
                    print("  Previous Resource associations not found.")
        else:
            if self.args.verbose > 1:
                print("  Removing Resources associated with labeledGroupName = %s'" % labeledGroupName)

            for r in set(rs):
                r.delete(using=self.args.database)
 def classify_using_sortingByDistanceToMean(self, X_train, X_test, y_train, y_test, portion_of_sampled_dataset_vector, classifiers_for_experiments):
     psa = PSA()
     # ---- sort samples of classes according to their ranks:
     sorted_samples, ranks = self.sort_samples_by_distance_from_mean(X=X_train, y=y_train)
     # ---- Experimenting:
     recognition_rate_LIST = np.zeros((len(classifiers_for_experiments), len(portion_of_sampled_dataset_vector)))
     classifier_index = 0
     for classifier in classifiers_for_experiments:
         print('############### Classifier: ' + classifier)
         portion_index = 0
         for portion_of_sampled_dataset in portion_of_sampled_dataset_vector:
             print('###### Portion of sampled dataset: ' + str(portion_of_sampled_dataset * 100) + '%')
             # ---- data reduction with PSA:
             number_of_classes = len(sorted_samples)
             n_samples = []
             for class_index in range(number_of_classes):
                 number_of_samples_of_class = sorted_samples[class_index].shape[0]
                 n_samples.append(int(number_of_samples_of_class * portion_of_sampled_dataset))
             X, y = psa.reduce_data(sorted_samples=sorted_samples, n_samples=n_samples)
             # ---- report number of sampled data after PSA:
             print('number of sampled data in classes, after PSA: ' + str(n_samples))
             # ---- classify with PSA:
             if classifier == 'SVM':
                 # --------- train:
                 clf = SVC(kernel='linear')
                 clf.fit(X=X, y=y)
             elif classifier == 'LDA':
                 # --------- train:
                 clf = LDA()
                 clf.fit(X=X, y=y)
             elif classifier == 'QDA':
                 # --------- train:
                 clf = QDA()
                 clf.fit(X=X, y=y)
             elif classifier == 'Random Forest':
                 # --------- train:
                 clf = RF(max_depth=2, random_state=0)
                 clf.fit(X=X, y=y)
             elif classifier == 'Logistic Regression':
                 # --------- train:
                 clf = LR()
                 clf.fit(X=X, y=y)
             elif classifier == 'Gaussian Naive Bayes':
                 # --------- train:
                 clf = GaussianNB()
                 clf.fit(X=X, y=y)
             # --------- test:
             labels_predicted = clf.predict(X_test)
             recognition_rate_PSA = (sum(labels_predicted == y_test) / len(labels_predicted)) * 100
             print('The recognition rate using ' + classifier + ' with data number reduction (PSA): ' + str(recognition_rate_PSA))
             recognition_rate_LIST[classifier_index, portion_index] = recognition_rate_PSA
             portion_index += 1
         classifier_index += 1
     return recognition_rate_LIST
Beispiel #22
0
def qda(train_size=None):
    _, _, X_train, X_test, y_train, y_test = dataset()
    if train_size:
        X_train, _, y_train, _ = train_test_split(X_train,
                                                  y_train,
                                                  train_size=train_size)

    qda = QDA()
    qda.fit(X_train, y_train)
    mae(y_test, qda.predict(X_test))
    confusion_matrix(y_test, qda.predict(X_test), qda.score(X_test, y_test))
Beispiel #23
0
def qda(train_x, train_y, test_x, test_y, nlabels, with_cfmat=False):
    model = QDA(priors=None,
                reg_param=0.0,
                store_covariance=False,
                store_covariances=None,
                tol=0.0001)
    retval = _classify(model, train_x, train_y, test_x, test_y, nlabels,
                       with_cfmat)
    fake_importances = np.zeros((train_x.shape[1], ))
    retval = list(retval) + [fake_importances]
    return retval
Beispiel #24
0
def get_QDA(Xtrain, Ytrain, Xtest = None , Ytest = None, verbose = 0):
    qda = QDA()
    qda.fit(Xtrain,Ytrain)
    
    scores = np.empty((2))
    if (verbose == 1):
        scores[0] = qda.score(Xtrain,Ytrain)
        print('QDA, train: {0:.02f}% '.format(scores[0]*100))
        if (type(Xtest) != type(None)):
            scores[1] = qda.score(Xtest,Ytest)
            print('QDA, test: {0:.02f}% '.format(scores[1]*100))
    return qda
Beispiel #25
0
def test_qda(data):
    qda_clf = QDA()
    qda_clf.fit(data.train_x, data.train_y)

    qda_predict = qda_clf.predict(data.train_x)
    print('QDA')
    print('Classification accuracy for train data = {:.2%}'.format(
        metrics.accuracy_score(data.train_y, qda_predict)))

    test_result = qda_clf.predict(data.test_x)

    print('Classification accuracy for test data = {:.2%}'.format(
        metrics.accuracy_score(data.test_y, test_result)))
Beispiel #26
0
def get_model(modelname):
    return {
        "mock": lambda: RandomClassifierMock(),
        "lda": lambda: LDA(),
        "qda": lambda: QDA(),
        "logistic": lambda: LogisticRegression(),
        "gnb": lambda: GaussianNB(),
        "knn": lambda: KNeighborsClassifier(),
        "forest": lambda: RandomForestClassifier(),
        "svm": lambda: SVC(kernel="linear"),
        "rbf-svm": lambda: SVC(kernel="rbf"),
        "poly3-svm": lambda: SVC(kernel="linear", degree=3)
    }[modelname]()
Beispiel #27
0
def test_quadratic_discriminant_analysis(data):
    qda_clf = QDA()
    qda_clf.fit(data.train_x, data.train_y)

    qda_predict = qda_clf.predict(data.train_x)
    print('QDA')
    print('Classification accuracy for train data = {:.2%}'.format(
        metrics.accuracy_score(data.train_y, qda_predict)))

    pred_test = qda_clf.predict(data.test_x)

    print('Classification accuracy for test data = {:.2%}'.format(
        metrics.accuracy_score(data.test_y, pred_test)))
 def SOmodelexp1():
     modelExperiment(
         SOInsampleData, SOOutsampleData, 'stackoverflowdata/', fullFV,
         [LR(),
          DT(),
          KNC(),
          RF(n_estimators=200),
          ABC(),
          GNB(),
          QDA()], [
              'LogisticRegression', 'DTree', 'KNN', 'RandomForest',
              'AdaBoosted', 'GaussianNB', 'QuadraticDiscriminantAnalysis'
          ], 'SOmodelExperiment1.csv', 'SOclassifier_plot1.png', True)
Beispiel #29
0
def fitLDAQDA(ts):
	X=ts[["Lag1","Lag2"]]
	y=ts["Direction"]
	startTest=datetime(2019,12,1)
	Xtrain=X[X.index<startTest]
	Xtest=X[X.index>=startTest]
	ytrain=y[y.index<startTest]
	ytest=y[y.index>=startTest]
	pred=pd.DataFrame(index=ytest.index)
	pred["Actual"]=ytest
	models=[("LR", LogisticRegression()),("LDA",LDA()), ("QDA",QDA())]
	for m in models:
		fitModel(m[0],m[1],Xtrain, ytrain, Xtest, pred)
Beispiel #30
0
 def quadratic_discriminant(self):
     parameters = {'reg_param': [0, .1, .25, .4, .5, .6, .75, .9, 1]}
     qda = GridSearchCV(QDA(),
                        parameters,
                        cv=self.kfolds,
                        error_score=np.nan)
     qda.fit(self.train_data, np.ravel(self.train_resp))
     resp_pred = np.reshape(qda.predict(self.test_data),
                            self.test_resp.shape)
     val_acc = np.sqrt(np.mean((resp_pred - self.test_resp)**2))
     best = qda
     best.validation_acc = val_acc
     return best