Example #1
0
def renew(directory):
    # path = directory + "判别公式参数.csv"
    # df = pd.DataFrame({"基金交易频率": [0], "最大交易金额": [0],
    #                    "混合型占比": [0], "债券指数或债券型占比": [0],
    #                    "货币型占比": [0], "股票指数或股票型占比": [0], "其他型占比": [0]})
    # df.to_csv(path, encoding='gbk')
    client = pd.read_csv(directory + '用户记录.csv', encoding='gbk', index_col=0)
    # x中不要加入其他型占比
    x = client[[
        "基金交易频率", "最大交易金额", "混合型占比", "债券指数或债券型占比", "货币型占比", "股票指数或股票型占比"
    ]]
    y = client['客户类别']
    lda = discriminant_analysis.LinearDiscriminantAnalysis()
    lda.fit(x, y)
    # print(x)
    # print(y)
    # print(lda.coef_)
    df = pd.DataFrame(data=lda.coef_,
                      columns=[
                          "基金交易频率", "最大交易金额", "混合型占比", "债券指数或债券型占比", "货币型占比",
                          "股票指数或股票型占比"
                      ])
    df['截距'] = lda.intercept_
    df.insert(0, '客户类别', lda.classes_)
    df.to_csv(directory + "判别公式参数.csv", encoding='gbk')
    return
Example #2
0
def test_LinearDiscriminantAnalysis_solver(*data):
    '''
    测试 LinearDiscriminantAnalysis 的预测性能随 solver 参数的影响
    :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记
    :return:  None
    '''
    X_train,X_test,y_train,y_test=data
    solvers=['svd','lsqr','eigen']
    for solver in solvers:
        if(solver=='svd'):
            lda = discriminant_analysis.LinearDiscriminantAnalysis(solver=solver)
        else:
            lda = discriminant_analysis.LinearDiscriminantAnalysis(solver=solver,
			shrinkage=None)
        lda.fit(X_train, y_train)
        print('Score at solver=%s: %.2f' %(solver, lda.score(X_test, y_test)))
Example #3
0
def train(train_data_1, train_data_2, numFilt):

    numTrials_1 = np.size(train_data_1,0)
    numTrials_2 = np.size(train_data_1,0)

    # train the CCACSP filters 
    ccacsp_filts = calc_CCACSP(train_data_1, train_data_2, numFilt)

    # extract the features
    train_filt_1 = apply_CCACSP(train_data_1, ccacsp_filts, numFilt)
    train_logP_1  = np.squeeze(np.log(np.var(train_filt_1, axis=2)))

    train_filt_2 = apply_CCACSP(train_data_2, ccacsp_filts, numFilt)
    train_logP_2  = np.squeeze(np.log(np.var(train_filt_2, axis=2)))

    # define the classifier
    clf = sklda.LinearDiscriminantAnalysis(solver='lsqr', shrinkage='auto')
    
    X = np.concatenate((train_logP_1, train_logP_2), axis=0)

    y1 = np.zeros(numTrials_1)
    y2 = np.ones(numTrials_2)
    y = np.concatenate((y1, y2))

    # train the classifier 
    clf.fit(X, y)

    return ccacsp_filts, clf
Example #4
0
def model_lda(train, test, label):

    reglin = discriminant_analysis.LinearDiscriminantAnalysis()
    reglin.fit(train[label], train['hotel_cluster'])

    prediction = reglin.predict_proba(test[label])

    return util.best_proba(prediction), reglin
Example #5
0
 def lda(self, n_components=2, solver='svd'):
     print('Calculating linear discriminant analysis....\n')
     t = time.time()
     lda = discriminant_analysis.LinearDiscriminantAnalysis(
         solver=solver, n_components=n_components)
     lda_array = lda.fit_transform(self.X, self.y)
     #plot2D(lda_array,self.y,'LDA','LDA: 1078 cells with 10 subtypes',(time.time() - t))
     return lda_array
def test_LinearDiscriminantAnalysis_solver(*data):
    X_train, X_test, y_train, y_test = data
    solvers = ['svd', 'lsqr', 'eigen']
    for solver in solvers:
        lda = discriminant_analysis.LinearDiscriminantAnalysis(solver=solver)
        lda.fit(X_train, y_train)
        print('Score at solver = %s : %.2f' %
              (solver, lda.score(X_test, y_test)))
Example #7
0
def classifyWithLinearDiscriminant(xTrain, xTest, yTrain, yTest):

    lda = discriminant_analysis.LinearDiscriminantAnalysis()
    print(lda.fit(xTrain, yTrain))
    print("Linear Discriminant Analysis Score: " +
          str(lda.score(xTest, yTest)))
    print("Linear Discriminant Analysis Report: ")
    print(classification_report(yTest, lda.predict(xTest), labels=[1, 2, 3]))
Example #8
0
def solve_weights(features_Vp,labels):
    clf = lda.LinearDiscriminantAnalysis()#solver='eigen',shrinkage='auto',priors=None,n_components=None)
    clf.fit(features_Vp, labels)
#    print(clf.predict(features_Vp[0]))
    #print(clf.coef_)
    features_Up = clf.transform(features_Vp)
    print(clf.coef_.shape,features_Up.shape)
    return clf.coef_,features_Up
Example #9
0
def main():
    data = pd.read_csv('data_3_6.csv', names=['x', 'y', 'class'])

    max_x = data['x'].max()
    min_x = data['x'].min()
    max_y = data['y'].max()
    min_y = data['y'].min()

    trans_x = data['x'].transform(lambda x: (x - min_x) / (max_x - min_x))
    trans_y = data['y'].transform(lambda x: (x - min_y) / (max_y - min_y))

    reshape_x = trans_x.values.reshape(-1, 1)
    reshape_y = trans_y.values.reshape(-1, 1)
    reshape_class = data['class'].values.reshape(-1, 1).ravel()

    reshape_data = np.append(reshape_y, reshape_x, axis=1)

    nb_classifier = nb.MultinomialNB()
    nb_fit = nb_classifier.fit(reshape_data, reshape_class)
    nb_scores = ms.cross_val_score(nb_fit, reshape_data, reshape_class, cv=10)
    nb_est = ms.cross_val_predict(nb_fit, reshape_data, reshape_class, cv=10)
    nb_conf = met.confusion_matrix(reshape_class, nb_est)
    print("Naive Bayes - Score %f +/-%f" %
          (np.mean(nb_scores), np.std(nb_scores)))
    print(nb_conf, "\n")

    qda_classifier = da.QuadraticDiscriminantAnalysis()
    qda_fit = qda_classifier.fit(reshape_data, reshape_class)
    qda_scores = ms.cross_val_score(qda_fit,
                                    reshape_data,
                                    reshape_class,
                                    cv=10)
    qda_est = ms.cross_val_predict(qda_fit, reshape_data, reshape_class, cv=10)
    qda_conf = met.confusion_matrix(reshape_class, qda_est)
    print("QDA - Score %f +/-%f" % (np.mean(qda_scores), np.std(qda_scores)))
    print(qda_conf, "\n")

    lda_classifier = da.LinearDiscriminantAnalysis()
    lda_fit = lda_classifier.fit(reshape_data, reshape_class)
    lda_scores = ms.cross_val_score(lda_fit,
                                    reshape_data,
                                    reshape_class,
                                    cv=10)
    lda_est = ms.cross_val_predict(lda_fit, reshape_data, reshape_class, cv=10)
    lda_conf = met.confusion_matrix(reshape_class, lda_est)
    print("LDA - Score %f +/-%f" % (np.mean(lda_scores), np.std(lda_scores)))
    print(lda_conf, "\n")

    plt.figure()
    mlxplt.plot_decision_regions(reshape_data, reshape_class, clf=nb_fit)

    plt.figure()
    mlxplt.plot_decision_regions(reshape_data, reshape_class, clf=qda_fit)

    plt.figure()
    mlxplt.plot_decision_regions(reshape_data, reshape_class, clf=lda_fit)

    plt.show()
def real_scores(values, target):
    clf_models = {}
    svm_clf = svm.SVC(gamma="auto").fit(values, target)
    clf_models[
        "svm"] = svm_clf  # Actually not needed, the cv does the training again
    lg_clf = linear_model.LogisticRegression(
        random_state=constants.RANDOM_STATE,
        solver='lbfgs').fit(values, target)
    clf_models["logistic_regression"] = lg_clf
    lineardisc_clf = discriminant_analysis.LinearDiscriminantAnalysis().fit(
        values, target)
    clf_models["linear_discriminant"] = lineardisc_clf
    neigh_clf = neighbors.KNeighborsClassifier().fit(values, target)
    clf_models["kneighbors"] = neigh_clf
    dectree_clf = tree.DecisionTreeClassifier(
        random_state=constants.RANDOM_STATE).fit(values, target)
    clf_models["decision_tree"] = dectree_clf
    gaussian_clf = naive_bayes.GaussianNB().fit(values, target)
    clf_models["gaussian_nb"] = gaussian_clf
    random_forest_clf = ensemble.RandomForestClassifier(n_estimators=100).fit(
        values, target)
    clf_models["random_forest"] = random_forest_clf
    gradient_boost_clf = ensemble.GradientBoostingClassifier().fit(
        values, target)
    clf_models["gradient_boosting"] = gradient_boost_clf
    results = {}
    for clf in clf_models.keys():
        cv_results = cross_validate(clf_models[clf],
                                    values,
                                    target,
                                    cv=10,
                                    scoring=SCORE_RAW)
        results["None+{}".format(clf)] = np.mean(cv_results["test_score"])

    for pproc in pprocs.keys():
        try:
            new_values, new_target = preprocessor(pproc, values, target)
        except:
            for clf in clf_models.keys():
                results["{}+{}".format(pproc, clf)] = 0
            continue

        for clf in clf_models.keys():
            try:
                cv_results = cross_validate(clf_models[clf],
                                            new_values,
                                            new_target,
                                            cv=10,
                                            scoring=SCORE_RAW)
            except ValueError:
                cv_results = cross_validate(clf_models[clf],
                                            values,
                                            target,
                                            cv=10,
                                            scoring=SCORE_RAW)
            results["{}+{}".format(pproc,
                                   clf)] = np.mean(cv_results["test_score"])
    return results
def lda():
    print(
        "\n################## Linear Discriminant Analysis ##################")

    from sklearn import discriminant_analysis

    clf = discriminant_analysis.LinearDiscriminantAnalysis()

    return clf
Example #12
0
def lda(X_tra, y_tra, X_val, y_val, index_no, classifier_num):

    y_tra, X_tra, y_val, X_val, weights = dataRegulationSKL(
        y_tra, X_tra, y_val, X_val, index_no)

    clf = skdisa.LinearDiscriminantAnalysis(solver='svd', n_components=5)

    clf.fit(X_tra, y_tra)
    return processLearning(clf, X_tra, y_tra, X_val, y_val)
def linear_discriminant_analysis(*data):
    _x_train, _x_test, _y_train, _y_test = data

    regression = discriminant_analysis.LinearDiscriminantAnalysis()
    regression.fit(_x_train, _y_train)
    print(regression.score(_x_train, _y_train))
    print(regression.score(_x_test, _y_test))

    pass
Example #14
0
def test_LinearDiscriminantAnalysis_solver(*data):
    '''
    test score with different solver
    :param data: train_data, test_data, train_value, test_value
    :return:  None
    '''
    X_train, X_test, y_train, y_test = data
    solvers = ['svd', 'lsqr', 'eigen']
    for solver in solvers:
        if (solver == 'svd'):
            lda = discriminant_analysis.LinearDiscriminantAnalysis(
                solver=solver)
        else:
            lda = discriminant_analysis.LinearDiscriminantAnalysis(
                solver=solver, shrinkage=None)
        lda.fit(X_train, y_train)
        print('Score at solver={0}: {1}'.format(solver,
                                                lda.score(X_test, y_test)))
Example #15
0
def run_plot_LDA():
    train_X, test_X, train_y, test_y = load_data()
    X = np.vstack((train_X, test_X))
    Y = np.vstack((train_y.reshape(train_y.size, 1),\
        test_y.reshape(test_y.size, 1)))
    model = discriminant_analysis.LinearDiscriminantAnalysis()
    model.fit(X, Y)
    converted_X = np.dot(X, np.transpose(model.coef_)) + model.intercept_
    plot_LDA(converted_X, Y)
Example #16
0
 def __init__(self, experiment):
     SemiSupervisedProjection.__init__(self, experiment)
     self.projection = discriminant_analysis.LinearDiscriminantAnalysis(
         n_components=self.num_components)
     if not self.conf.families_supervision:
         message = 'Lda projection without families supervision. '
         message += 'The projection space is of dimension 1, and so the projected instances cannot be displayed '
         message += 'with hexagonal binnnings.'
         warnings.warn(message)
 def calc_fitness(self, data, target):
     if self.changed:
         nfolds = 4
         scores = np.zeros(nfolds)
         precision = np.zeros(nfolds)
         recall = np.zeros(nfolds)
         X = np.copy(data)
         for i in range(0, len(self.genome)):
             if self.genome[len(self.genome) - 1 - i] == 0:
                 X = np.delete(X, len(self.genome) - 1 - i, 1)
         i = 0
         skf = cross_validation.StratifiedKFold(n_splits=nfolds)
         for train, test in skf.split(X, target):
             if self.type == 'dt':
                 self.clf = tree.DecisionTreeClassifier(
                     criterion='entropy',
                     splitter='random').fit(X[train], target[train])
             elif self.type == 'svm':
                 self.clf = svm.SVC(kernel='linear').fit(
                     X[train], target[train])
             elif self.type == 'knn':
                 self.clf = knn.KNeighborsClassifier().fit(
                     X[train], target[train])
             elif self.type == 'lr':
                 self.clf = lm.LogisticRegression().fit(
                     X[train], target[train])
             elif self.type == 'nb':
                 self.clf = nb.GaussianNB().fit(X[train], target[train])
             elif self.type == 'rf':
                 self.clf = ens.RandomForestClassifier().fit(
                     X[train], target[train])
             elif self.type == 'et':
                 self.clf = ens.ExtraTreesClassifier().fit(
                     X[train], target[train])
             elif self.type == 'mlp':
                 self.clf = nn.MLPClassifier(
                     hidden_layer_sizes=(40,
                                         5)).fit(X[train], target[train])
             elif self.type == 'lda':
                 self.clf = da.LinearDiscriminantAnalysis().fit(
                     X[train], target[train])
             elif self.type == 'qda':
                 self.clf = da.QuadraticDiscriminantAnalysis().fit(
                     X[train], target[train])
             else:
                 self.clf = None
             p = self.clf.predict(X[test])
             scores[i] = metrics.accuracy_score(target[test], p)
             precision[i] = metrics.precision_score(target[test], p)
             recall[i] = metrics.recall_score(target[test], p)
             i += 1
         self.accuracy = scores.mean()
         self.std = scores.std()
         self.precision = precision.mean()
         self.recall = recall.mean()
         self.changed = False
Example #18
0
 def __init__(self, conf):
     SemiSupervisedProjection.__init__(self, conf)
     self.projection = discriminant_analysis.LinearDiscriminantAnalysis(
         n_components=conf.num_components)
     if not self.conf.multiclass:
         self.conf.logger.warning(
             'Lda projection without families supervision. '
             'The projection space is of dimension 1, and so the '
             'projected instances cannot be displayed with hexagonal '
             'binnnings.')
Example #19
0
def lda_projection():

    print("Computing Linear Discriminant Analysis projection")
    X2 = X.copy()
    X2.flat[::X.shape[1] + 1] += 0.01  # Make X invertible
    t0 = time()
    X_lda = discriminant_analysis.LinearDiscriminantAnalysis(n_components=2).fit_transform(X2, y)
    plot_embedding(X_lda,
                   "Linear Discriminant projection of the digits (time %.2fs)" %
                   (time() - t0))
def test_LinearDiscriminantAnalysis(*data):
    '''
    测试 LinearDiscriminantAnalysis 的用法

    '''
    X_train, X_test, y_train, y_test = data
    lda = discriminant_analysis.LinearDiscriminantAnalysis()
    lda.fit(X_train, y_train)
    print('Coefficients:%s, intercept %s' % (lda.coef_, lda.intercept_))
    print('Score: %.2f' % lda.score(X_test, y_test))
Example #21
0
def rdm_ldt(data, noise=0.):  # Our data is an mxnxk matrix. m = samples, n = states, k = activation.
    m, n, k = data.shape
    # add noise to the data
    data += np.random.normal(loc=0., scale=noise, size=data.shape)
    if m % 2 != 0:
        # discard last sample
        data = data[:-1]
        m = m-1
        warnings.warn("for ldt we need an even number of samples. Discarding one sample")

    # Divide the data into two separate sets.
    set1 = data[:m//2]
    set2 = data[m//2:]

    # Run linear discriminant analysis for each pair of states...
    rdm = np.zeros((n, n))
    for i in range(n):
        for j in range(i+1, n):
            # Get sample activations for 2 states from the training set
            train_state1 = set1[:, i, :].reshape(m//2, k)
            train_state2 = set1[:, j, :].reshape(m//2, k)
            # stack them:
            X = np.concatenate((train_state1, train_state2), axis=0)
            # give state 1 label "0" and state 2 label "1"
            y = np.hstack((np.zeros(m//2), np.ones(m//2)))
            # fit linear discriminant analysis
            lda = sklda.LinearDiscriminantAnalysis(solver='svd')
            lda.fit(X, y)

            #print("test")
            # save the intercept.
            coeffs = lda.coef_
            intercept = lda.intercept_

            # Get the same states from the test set
            test_state1 = set2[:, i, :].reshape(m//2, k)
            test_state2 = set2[:, j, :].reshape(m//2, k)
            # Compute "distance" (orthogonal vector value) with the intercept.
            # These "distances" (not really) will be positive or negative depending on the category
            distances_state1 = (np.dot(coeffs, np.transpose(test_state1)).reshape(-1) + intercept) / np.sqrt(np.sum(coeffs**2))
            distances_state2 = (np.dot(coeffs, np.transpose(test_state2)).reshape(-1) + intercept) / np.sqrt(np.sum(coeffs**2))

            # Now do a t-test to see if the two categories are separated.
            tvalue, p_value = stats.ttest_ind(distances_state1, distances_state2)
            distance = np.abs(tvalue)
            # This is our distance value for the RDM!
            rdm[i, j] = distance
            # Do the other side of the RDM, it's symmetrical.
            rdm[j, i] = distance

    # Fill the diagonal with 0s.
    for i in range(n):
        rdm[i, i] = 0.

    return rdm
Example #22
0
    def LDA(self, train_X, test_X, train_y, dims):
        lda = sk_discriminant_analysis.LinearDiscriminantAnalysis(
            n_components=dims)
        lda.fit(train_X, train_y)
        print('LDA:')
        print('LDA的数据中心点:', lda.means_)
        print('LDA分类的正确率:', lda.score(train_X, train_y))
        train_X = lda.transform(train_X)
        test_X = lda.transform(test_X)

        return train_X, train_y, test_X
def test_LinearDiscriminantAnalysis(*data):
    '''
    测试 LinearDiscriminantAnalysis 的用法
    :param data: 可变参数。它是一个元组,这里要求其元素依次为:训练样本集、测试样本集、训练样本的标记、测试样本的标记
    :return:  None
    '''
    X_train, X_test, y_train, y_test = data
    lda = discriminant_analysis.LinearDiscriminantAnalysis()
    lda.fit(X_train, y_train)
    print('Coefficients:%s, intercept %s' % (lda.coef_, lda.intercept_))
    print('Score: %.2f' % lda.score(X_test, y_test))
Example #24
0
    def _train(self):
        x = self._train_features
        y = self._train_outputs

        pipe = pipeline.Pipeline([
            ('expand', preprocessing.PolynomialFeatures(degree=2, )),
            ('estim', discriminant_analysis.LinearDiscriminantAnalysis())
        ])

        pipe.fit(x, y)
        self._model = pipe.predict
Example #25
0
def test_LinearDiscriminantAnalysis(*data):
    '''
    test of LDA
    :param data: train_data, test_data, train_value, test_value
    :return:  None
    '''
    X_train, X_test, y_train, y_test = data
    lda = discriminant_analysis.LinearDiscriminantAnalysis()
    lda.fit(X_train, y_train)
    print('Coefficients: {0}, intercept {1}'.format(lda.coef_, lda.intercept_))
    print('Score: {0}'.format(lda.score(X_test, y_test)))
Example #26
0
def lda(X, y, nr_components=2):
    """
    Linear discrimindant analysis
    :param X: Input vectors
    :param y: Input classes
    :param nr_components: Dimension of output co-ordinates
    :return: Output co-ordinates
    """
    print("Computing Linear Discriminant Analysis projection")
    X2 = X.copy()
    X2.flat[::X.shape[1] + 1] += 0.01  # Make X invertible
    return discriminant_analysis.LinearDiscriminantAnalysis(n_components=nr_components).fit_transform(X2, y)
Example #27
0
def lda_predict(self, unlabelled):
    ''' Use Linear Discriminant Analysis for classification.
    WARNING: Will only work when we have multiple data samples
    for each dataset (i.e., two for left, two for right, etc.)'''
    self.clf = discriminant_analysis.LinearDiscriminantAnalysis()
    self.clf.fit(self.dataset, self.targets)

    unlabelled = np.array(unlabelled)
    unlabelled = unlabelled.flatten()

    target = self.clf.predict(unlabelled)
    return self.positions[target[0]]
def LDA_proposed(data, target, n):
    my_pca = My_pca(n)
    results = my_pca.MCPCA_fit_transform(data, target)
    X_train, X_test, Y_train, Y_test = train_test_split(results['new_data'],
                                                        target,
                                                        train_size=0.35,
                                                        random_state=0)

    model = discriminant_analysis.LinearDiscriminantAnalysis()
    model.fit(X_train, Y_train)
    sc = model.score(X_test, Y_test)
    return sc
def LDA_pca(data, target, n):
    pca = PCA(n)
    data1 = pca.fit_transform(data)
    X_train, X_test, Y_train, Y_test = train_test_split(data1,
                                                        target,
                                                        train_size=0.35,
                                                        random_state=0)

    model = discriminant_analysis.LinearDiscriminantAnalysis()
    model.fit(X_train, Y_train)
    sc = model.score(X_test, Y_test)
    return sc
def using_lda(X, s=None):
    X = X.as_matrix()
    print("Computing Linear Discriminant Analysis projection")
    X2 = X.copy()
    X2.flat[::X.shape[1] + 1] += 0.01  # Make X invertible
    t0 = time()
    X_lda = discriminant_analysis.LinearDiscriminantAnalysis(
        n_components=2).fit_transform(X2, y)
    #plot_embedding(X_lda,"Linear Discriminant projection of the results (time %.2fs)" %(time() - t0))
    plot_our_embedding(
        X_lda, "Linear Discriminant projection of the results (time %.2fs)" %
        (time() - t0), s)