예제 #1
0
def train_acc(data_path, algorithm_name):
    print(data_path)
    x, y, test_x, test_y = data.run(data_path)
    clf = None
    if algorithm_name == "gnb":
        clf = GNB()
        print("gnb instance.")
    elif algorithm_name == "lda":
        clf = LDA()
        print("lda instance.")
    elif algorithm_name == "qda":
        clf = QDA()
        print("qda instance.")
    else:
        print("NO Implement")
        return "NO Implement"

    num = 0
    clf.fit(x, y)
    train_result = clf.predict(x)
    for i in range(len(train_result)):
        if train_result[i] == y[i]:
            num += 1

    return num / len(y)
예제 #2
0
def models(modeltype='FDA'):
    '''
    Returns a dictionary of (trained) models. One per dataset.
    '''

    models_dict = dict()

    for filename in sorted(os.listdir(ROOT)):
        if filename.endswith('train'):
            dataset = filename.split('.')[0]
            XY_dict_train = file_to_dict(filename)
            X = XY_dict_train['X']
            Y = XY_dict_train['Y']

            if modeltype == 'FDA':
                model = FDA()
            elif modeltype == 'LogReg':
                model = LogReg()
            elif modeltype == 'LinReg':
                model = LinReg()
            elif modeltype == 'QDA':
                model = QDA()
            else:
                raise ValueError("model not implemented")

            models_dict[dataset] = model

            model.fit(X, Y)

    return models_dict
예제 #3
0
def main():

    #preprocessing
    dataPre = preprocessing()
    dataPre.process_data()
    xtrain, xtest, ytrain, ytest = dataPre.divide_data()

    #visualizing
    #dataPre.visualize_data()

    #modeling (BP)
    print("Modeling ...")
    my_model = QDA()
    my_model.train(xtrain, ytrain)
    #my_model.test(xtest)
    acc = my_model.get_accuracy(xtest, ytest)
    print("testing accuracy : ", "{0:.4f}".format(acc))
    print("*********************************************")
예제 #4
0
def main(dataset, compute_errors=True, plot_boundaries=True, save=False):
    """ Fit the four models on the training sets, depending on the parameters
    compute the accuracy et plot the boundary
    args :: dataset : array(str) """

    filename = "data/" + dataset + ".train"
    x_train, y_train = read_file(filename)
    filename = "data/" + dataset + ".test"
    x_test, y_test = read_file(filename)

    models = [
        LDA(x_train, y_train),
        LinearRegression(x_train, y_train),
        LogisiticRegression(x_train, y_train),
        QDA(x_train, y_train)
    ]

    model_names = ["LDA", "LinearRegression", "LogisiticRegression", "QDA"]
    for i, model in enumerate(models):
        model_name = model_names[i]
        model.fit()
        if compute_errors:
            y_pred_train = [model.predict(x) for x in x_train]
            e = accuracy(y_train, y_pred_train)
            print("Accuracy with " + model_name)
            print("Training: ", e)
            y_pred_test = [model.predict(x) for x in x_test]
            e = accuracy(y_test, y_pred_test)
            print("Testing: ", e)
        if plot_boundaries:
            model.plot_boundary()
            plt.scatter(model.x[:, 0], model.x[:, 1], c=model.y, s=1)
            title = "Model: " + model_name + ", " + dataset + " (Train)"
            plt.title(title)
            if save:
                plt.savefig("figs/" + model_name + "_" + dataset[-1] +
                            "Train.png")
            plt.show()
            model.plot_boundary()
            plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test, s=1)
            title = "Model: " + model_name + ", " + dataset + " (Test)"
            plt.title(title)
            if save:
                plt.savefig("figs/" + model_name + "_" + dataset[-1] +
                            "Test.png")
            plt.show()
예제 #5
0
파일: main.py 프로젝트: ypradat/MVAClasses
    plot_decision_regions(X=X_combined,
                          y=y_combined,
                          classifier=linr_clf,
                          test_idx=range(X_train.shape[0],
                                         X_A_train.shape[0] + X_test.shape[0]),
                          ax=ax[2])
    ax[2].set_xlabel("x1", fontsize="large")
    ax[2].set_ylabel("x2", fontsize="large")
    ax[2].legend(loc="upper right", fontsize="large")
    ax[2].set_title("Linear regression (normal equation) on dataset %s" % l,
                    fontsize="x-large",
                    fontweight="bold")
    """
    Run QDA
    """
    QDA_clf = QDA()
    QDA_clf.fit(X_train, y_train)

    qda_train_error = np.mean(QDA_clf.predict(X_train).flatten() != y_train)
    qda_test_error = np.mean(QDA_clf.predict(X_test).flatten() != y_test)

    plot_decision_regions(X=X_combined,
                          y=y_combined,
                          classifier=QDA_clf,
                          test_idx=range(X_train.shape[0],
                                         X_train.shape[0] + X_test.shape[0]),
                          ax=ax[3])
    ax[3].set_xlabel("x1", fontsize="large")
    ax[3].set_ylabel("x2", fontsize="large")
    ax[3].legend(loc="upper right", fontsize="large")
    ax[3].set_title("Generative model (QDA) on dataset %s" % l,
    x1 = np.random.normal(loc=[1, 2], scale=(0.5, 0.2), size=(m, 2))
    y1 = np.zeros((m, 1))

    x2 = np.random.normal(loc=[5, 4], scale=(0.6, 1.0), size=(m, 2))
    y2 = np.ones((m, 1))

    x3 = np.random.normal(loc=[10, 3], scale=(0.4, 0.1), size=(m, 2))
    y3 = np.ones((m, 1)) * 2

    x = np.concatenate([x1, x2, x3])
    y = np.concatenate([y1, y2, y3])

    x_train, x_test, y_train, y_test = train_test_split(x, y)

    qda = QDA()
    qda.fit(x_train, y_train)
    train_acc = (qda.predict(x_train).argmax(1)
                 == y_train.squeeze()).mean() * 100
    test_acc = (qda.predict(x_test).argmax(1) == y_test.squeeze()).mean() * 100

    print(f'Train Accuracy : {train_acc}%')
    print(f'Test Accuracy : {test_acc}%')

    # plot
    xx1 = np.arange(x.min(), x.max(), 0.1)
    xx2 = np.arange(x.min(), x.max(), 0.1)
    xx1, xx2 = np.meshgrid(xx1, xx2)
    xx = np.c_[xx1.ravel(), xx2.ravel()]
    predict = qda.predict(xx).argmax(1)
예제 #7
0
        log_reg = LogisticRegression()
        log_reg.fit(X_train, y_train)
        plot_results(log_reg, params['idx_dataset'], X_train, y_train, X_test,
                     y_test)
        print('The accuracy on train (test) dataset {} for LogReg: {} ({})'.
              format(params['idx_dataset'], log_reg.score(X_train, y_train),
                     log_reg.score(X_test, y_test)))

        # Linear regression
        lin_reg = LinearRegression()
        lin_reg.fit(X_train, y_train)
        plot_results(lin_reg, params['idx_dataset'], X_train, y_train, X_test,
                     y_test)
        print('The accuracy on train (test) dataset {} for LinReg: {} ({})'.
              format(params['idx_dataset'], lin_reg.score(X_train, y_train),
                     lin_reg.score(X_test, y_test)))

        # Quadratic Discriminant Analysis (QDA)
        qda = QDA()
        qda.fit(X_train, y_train)
        plot_results(qda, params['idx_dataset'], X_train, y_train, X_test,
                     y_test)
        print(
            'The accuracy on train (test) dataset {} for QDA: {} ({})'.format(
                params['idx_dataset'], qda.score(X_train, y_train),
                qda.score(X_test, y_test)))

    else:
        plot_results(None, params['idx_dataset'], X_train, y_train, X_test,
                     y_test)
def main():
    datasets = ['A', 'B', 'C']
    lda_models = {}
    qda_models = {}
    logistic_reg_models = {}
    lin_reg_models = {}
    for dataset in datasets:
        path_train = "classification_data_HWK1/classification_data_HWK1/classification%s.train" % dataset
        path_test = "classification_data_HWK1/classification_data_HWK1/classification%s.test" % dataset
        data_x_train, data_y_train = data.parse_data_with_labels(
            os.path.abspath(path_train), dimension=2, delimiter="\t")
        data_x_test, data_y_test = data.parse_data_with_labels(
            os.path.abspath(path_test), dimension=2, delimiter="\t")

        # LDA
        lda_models[dataset] = LDA(data_x_train,
                                  data_y_train,
                                  data_x_test,
                                  data_y_test,
                                  dataset_name=dataset)
        lda_models[dataset].train()

        print "\nLDA_Dataset_%s:\n" % dataset
        print "The bernoulli parameter pi is \n%s\n" % lda_models[dataset].pi
        print "The mean for the class {y=0} is \n%s\n" % lda_models[
            dataset].mu_0
        print "The mean for the class {y=1} is \n%s\n" % lda_models[
            dataset].mu_1
        print "Sigma is: \n%s\n" % lda_models[dataset].sigma
        print "Training misclassification error is: %.2f %%\n" % (
            lda_models[dataset].compute_misclassification_err()[0] * 100)
        print "Test misclassification error is: %.2f %%\n" % (
            lda_models[dataset].compute_misclassification_err()[1] * 100)

        # Logistic Regression
        print "\nLogistic_Regression_Dataset_%s:\n" % dataset
        w0 = np.array([[0, 0, 1]]).T
        logistic_reg_models[dataset] = LogisticRegression(data_x_train,
                                                          data_y_train,
                                                          w0,
                                                          data_x_test,
                                                          data_y_test,
                                                          dataset_name=dataset,
                                                          nb_iterations=20,
                                                          lambda_val=0.01)
        logistic_reg_models[dataset].train()

        print "\nThe learnt parameter w is: \n%s\n" % logistic_reg_models[
            dataset].w
        print "\nThe learnt parameter b is: \n%s\n" % logistic_reg_models[
            dataset].b
        print "Training misclassification error is: %.2f %%\n" % (
            logistic_reg_models[dataset].compute_misclassification_err()[0] *
            100.)
        print "Test misclassification error is: %.2f %%\n" % (
            logistic_reg_models[dataset].compute_misclassification_err()[1] *
            100.)

        # Linear Regression
        print "\nLinear_Regression_Dataset_%s\n" % dataset
        lin_reg_models[dataset] = LinearRegression(data_x_train,
                                                   data_y_train,
                                                   data_x_test,
                                                   data_y_test,
                                                   dataset_name=dataset,
                                                   lambda_val=0)
        lin_reg_models[dataset].train()

        print "The learnt parameter w is: \n%s\n" % lin_reg_models[dataset].w
        print "\nThe learnt parameter b is: \n%s\n" % logistic_reg_models[
            dataset].b
        print "The variance of Y computed for the latter w is: \n%s\n" % lin_reg_models[
            dataset].variance

        print "Training misclassification error is: %.2f %%\n" % (
            lin_reg_models[dataset].compute_misclassification_err()[0] * 100.)
        print "Test misclassification error is: %.2f %%\n" % (
            lin_reg_models[dataset].compute_misclassification_err()[1] * 100.)

        # QDA
        qda_models[dataset] = QDA(data_x_train,
                                  data_y_train,
                                  data_x_test,
                                  data_y_test,
                                  dataset_name=dataset)
        qda_models[dataset].train()

        print "\nQDA_Dataset_%s:\n" % dataset
        print "The bernoulli parameter pi is \n%s\n" % qda_models[dataset].pi
        print "The mean for the class {y=0} is \n%s\n" % qda_models[
            dataset].mu_0
        print "The mean for the class {y=1} is \n%s\n" % qda_models[
            dataset].mu_1
        print "Sigma for the class {y=0} is: \n%s\n" % qda_models[
            dataset].sigma_0
        print "Sigma for the class {y=1} is: \n%s\n" % qda_models[
            dataset].sigma_1
        print "Training misclassification error is: %.2f %%\n" % (
            qda_models[dataset].compute_misclassification_err()[0] * 100)
        print "Test misclassification error is: %.2f %%\n" % (
            qda_models[dataset].compute_misclassification_err()[1] * 100)

    for model in [lda_models, logistic_reg_models, lin_reg_models, qda_models]:
        plt.subplot(221)
        model['A'].plot()
        plt.subplot(222)
        model['B'].plot()
        plt.subplot(212)
        model['C'].plot()

        plt.show()

        if model == logistic_reg_models:
            plt.subplot(221)
            model['A'].plot_convergence_func()
            plt.subplot(222)
            model['B'].plot_convergence_func()
            plt.subplot(212)
            model['C'].plot_convergence_func()

            plt.show()

        plt.subplot(221)
        model['A'].plot(test_mode=True)
        plt.subplot(222)
        model['B'].plot(test_mode=True)
        plt.subplot(212)
        model['C'].plot(test_mode=True)

        plt.show()