Beispiel #1
1
def tryLinearDiscriminantAnalysis(goFast):
  from sklearn.datasets import dump_svmlight_file, load_svmlight_file
  if goFast:
    training_data, training_labels = load_svmlight_file("dt1_1500.trn.svm", n_features=253659, zero_based=True)
    validation_data, validation_labels = load_svmlight_file("dt1_1500.vld.svm", n_features=253659, zero_based=True)
    testing_data, testing_labels = load_svmlight_file("dt1_1500.tst.svm", n_features=253659, zero_based=True)
  else:
    training_data, training_labels = load_svmlight_file("dt1.trn.svm", n_features=253659, zero_based=True)
    validation_data, validation_labels = load_svmlight_file("dt1.vld.svm", n_features=253659, zero_based=True)
    testing_data, testing_labels = load_svmlight_file("dt1.tst.svm", n_features=253659, zero_based=True)

  from sklearn.lda import LDA
  from sklearn.metrics import accuracy_score
  from sklearn.grid_search import ParameterGrid
  from sklearn.decomposition import RandomizedPCA

  rpcaDataGrid = [{"n_components": [10,45,70,100],
                    "iterated_power": [2, 3, 4],
                    "whiten": [True]}]

  for rpca_parameter_set in ParameterGrid(rpcaDataGrid):
    rpcaOperator = RandomizedPCA(**rpca_parameter_set)
    rpcaOperator.fit(training_data,training_labels)
    new_training_data = rpcaOperator.transform(training_data,training_labels)
    new_validation_data = rpcaOperator.transform(validation_data,validation_labels)
    ldaOperator = LDA()
    ldaOperator.fit(new_training_data,training_labels)
    print "Score = " + str(accuracy_score(validation_labels,ldaOperator.predict(new_validation_data)))
Beispiel #2
1
class Ensemble:

	def __init__(self, data):
		self.rf = RandomForestClassifier(n_estimators=80, n_jobs=-1, min_samples_split=45, criterion='entropy')
		self.lda = LDA()
		self.dec = DecisionTreeClassifier(criterion='entropy')
		self.ada = AdaBoostClassifier(n_estimators=500, learning_rate=0.25)

		self.make_prediction(data)


	def make_prediction(self, data):
		'''
		Make an ensemble prediction
		'''
		self.rf.fit(data.features_train, data.labels_train)
		self.lda.fit(data.features_train, data.labels_train)
		self.dec.fit(data.features_train, data.labels_train)
		self.ada.fit(data.features_train, data.labels_train)

		pre_pred = []
		self.pred = []

		ada_pred = self.ada.predict(data.features_test)
		rf_pred = self.rf.predict(data.features_test)
		lda_pred = self.lda.predict(data.features_test)
		dec_pred = self.dec.predict(data.features_test)

		for i in range(len(rf_pred)):
			pre_pred.append([ rf_pred[i], lda_pred[i], dec_pred[i], ada_pred[i] ])

		for entry in pre_pred:
			pred_list = sorted(entry, key=entry.count, reverse=True)
			self.pred.append(pred_list[0])
Beispiel #3
1
 def startlda(self):
     from sklearn.lda import LDA
     clf=LDA()
     X=np.array(self.traindata)
     Y=np.array(self.trainclass)
     y=self.testdata
     X=[[float(y) for y in x] for x in X]
     Y=[[int(y) for y in x] for x in Y]
     y=[[float(y) for y in x] for x in self.testdata]
     clf.fit(X,Y)
     print clf.predict(y)
Beispiel #4
0
 def DLDA(self, trainLabel, featureData, testData):
     # print featureData == testData
     # print testData
     clf = LDA()
     clf.fit(featureData, trainLabel)
     testLabel = clf.predict(testData)
     return testLabel
    def test_twomethods(self):
        key_y_pred = 'y' + conf.SEP + conf.PREDICTION
        X, y = datasets.make_classification(n_samples=20, n_features=5,
                                            n_informative=2)
        # = With EPAC
        wf = Methods(LDA(), SVC(kernel="linear"))
        r_epac = wf.run(X=X, y=y)

        # = With SKLEARN
        lda = LDA()
        svm = SVC(kernel="linear")
        lda.fit(X, y)
        svm.fit(X, y)
        r_sklearn = [lda.predict(X), svm.predict(X)]

        # Comparison
        for i_cls in range(2):
            comp = np.all(np.asarray(r_epac[i_cls][key_y_pred]) ==
                                    np.asarray(r_sklearn[i_cls]))
            self.assertTrue(comp, u'Diff Methods')

        # test reduce
        r_epac_reduce = [wf.reduce().values()[0][key_y_pred],
            wf.reduce().values()[1][key_y_pred]]
        comp = np.all(np.asarray(r_epac_reduce) == np.asarray(r_sklearn))
        self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
def LDAmeanScore(X, Y, n_folds, dim_reduction=0):
    """
    :param X: matrice d'entree du classifieur, n_samples*n_parameters, n_paramters>=2, n_samples>0. DONNES COHERENTES POUR CLASSIFICATION LDA
    :param Y: matrice des labels, n_samples
    :param n_folds: nombre de tests pour le KFold, >1
    :param dim_reduction: si egale a 0, pas de reduction, si inferieur a 0, best_reduction, sinon on fait une reduction PCA (on reduit a dim_reduction dimensions)
    :return: le score moyen de la validation croisee, affiche ce score. Si n_folds>n_samples, renvoie -1
 """
    if dim_reduction > 0 and X.shape[1] > dim_reduction:
        X = dim_reduction_PCA(X, dim_reduction)
    if dim_reduction == -1:
        dim_reduction = best_dimension(X)
        print "Best dimension : " + str(dim_reduction)
        X = dim_reduction_PCA(X, dim_reduction)

    if X.shape[0] > n_folds:
        # Cross validation pour estimer la performance d'un classifieur LDA
        kf = KFold(n=len(Y), n_folds=n_folds, shuffle=True, random_state=None)
        scores = []
        for train_index, test_index in kf:
            X_train, X_test = X[train_index, :], X[test_index, :]
            Y_train, Y_test = Y[train_index], Y[test_index]
            cl = LDA()
            cl.fit(X_train, Y_train)
            scores.append(cl.score(X_test, Y_test))

        print "Score moyen : ", np.mean(np.array(scores))
        return 100.0 * np.mean(np.array(scores))
    else:
        return -1
Beispiel #7
0
class FldaLite(FLDA):
    def fit(self, X, y):
        self.scaler_ = StandardScaler()
        self.pca_ = PCA(n_components=self.pca_n_components)
        XX = self.pca_.fit_transform(self.scaler_.fit_transform(X))

        self.knn_ = KNeighborsClassifier(n_neighbors=self.knn_n_neighs)
        self.knn_.fit(XX, y)

        yy = map(lambda nn: y[nn], self.knn_.kneighbors(XX)[1])
        self.cv_ = CountVectorizer(input='content', tokenizer=lambda x: x, lowercase=False)
        XXX = self.cv_.fit_transform(array(yy))
        self.tfidf_transformer_ = TfidfTransformer()
        XXX = self.tfidf_transformer_.fit_transform(XXX)

        self.clusterer_ = SpectralClustering(n_clusters=self.n_scented_clusters)
        yyy = self.clusterer_.fit_predict(XXX)

        self.lda_ = LDA(**self.lda_params)
        self.lda_.fit(XX, yyy)

        return self

    def transform(self, X):
        return self.lda_.transform(self.pca_.fit_transform(self.scaler_.fit_transform(X)))
	def LDA模型(self, 問題, 答案):
		lda = LDA()
# 		clf = svm.NuSVC()
		print('訓練LDA')
		lda.fit(問題, 答案)
		print('訓練了')
		return lambda 問:lda.predict(問)
Beispiel #9
0
 def lda_on(train_x,
            train_y,
            test_x,
            test_y,
            feats_name='all_features'):
     """ Linear Discriminant Analysis """
     lda = LDA()
     lda.fit(train_x, train_y, store_covariance=True)
     print feats_name, "(train):", lda.score(train_x, train_y)
     print feats_name, "(test):", lda.score(test_x, test_y)
     with open(dataset_name + '_lda_classif_' + feats_name + '.pickle',
               'w') as w_f:
         cPickle.dump(lda, w_f)
     y_pred = lda.predict(test_x)
     X_train, X_validate, y_train, y_validate = cross_validation\
             .train_test_split(train_x, train_y, test_size=0.2,
                     random_state=0)
     lda.fit(X_train, y_train)
     print feats_name, "(validation):", lda.score(
         X_validate, y_validate)
     y_pred_valid = lda.predict(X_validate)
     cm_test = confusion_matrix(test_y, y_pred)
     cm_valid = confusion_matrix(y_validate, y_pred_valid)
     np.set_printoptions(threshold='nan')
     with open("cm_test" + feats_name + ".txt", 'w') as w_f:
         print >> w_f, cm_test
     with open("cm_valid" + feats_name + ".txt", 'w') as w_f:
         print >> w_f, cm_valid
Beispiel #10
0
def main():
    
    for question in range(3,18):
        
        print("Question ", question, " Percent Accuracy")

        trainingSet_features, trainingSet_labels, testSet_features, testSet_labels = loadTrainingAndTestData(question)
        #print(len(trainingSet_features))
        #print(trainingSet_labels)
        #print(len(testSet_features))
        #print(len(testSet_labels))
        
        #print(trainingSet_labels)
        nnC = KNeighborsClassifier(n_neighbors=5)
        nnC.fit(trainingSet_features, trainingSet_labels) 
        nnC_predictions = nnC.predict(testSet_features)
        print("Nearest Neighbor: %.2f" % (100*accuracy_score(testSet_labels,nnC_predictions)),"%")

        svmC = svm.SVC()
        svmC.fit(trainingSet_features, trainingSet_labels) 
        svmCpredictions = svmC.predict(testSet_features)
        print("Support Vector Machines: %.2f" % (100*accuracy_score(testSet_labels,svmCpredictions)),"%")

        rfC = RandomForestClassifier(n_estimators=100)
        rfC.fit(trainingSet_features, trainingSet_labels) 
        rfC_predictions = rfC.predict(testSet_features)
        print("Random Forrest:  %.2f" % (100*accuracy_score(testSet_labels,rfC_predictions)),"%")

        ldaC = LDA(solver='lsqr')
        ldaC.fit(trainingSet_features, trainingSet_labels) 
        ldaC_predictions = ldaC.predict(testSet_features)
        print("Linear Discriminant Analysis Classifier: %.2f" % (100*accuracy_score(testSet_labels,ldaC_predictions)),"%")
Beispiel #11
0
def score(train_X, train_y):
    X_train, X_valid, y_train, y_valid = train_test_split(train_X, train_y, test_size=0.01, random_state=10)

    clf = LDA()
    clf.fit(X_train, y_train)
    y_pred = clf.predict_proba(X_valid)
    return log_loss(y_valid, y_pred)
Beispiel #12
0
def test_classification():
    from read import read
    import numpy, tfidf
    from sklearn.decomposition import TruncatedSVD
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import Normalizer

    m, files = read("training.json")
    y_map = [str(file["topic"]) for file in files]
    map = []
    for i in range(len(y_map)):
        if(len(map) == 0 or not map.__contains__(y_map[i])):
            map.append(y_map[i])
    y = numpy.array([map.index(y_map[i]) for i in range(len(y_map))])

    print("Construindo TF-IDF...")
    X, vectorizer = tfidf.vectorizeTFIDF(files)
    print X.shape

    print("Performing dimensionality reduction using LDA...")

    lda = LDA(n_components=9)
    X = X.toarray()
    lda.fit(X, y)
    X = lda.transform(X)

    mlp = MLPClassifier()
    mlp.fit(X, y)
    training_score = mlp.score(X, y)
    print("training accuracy: %f" % training_score)
Beispiel #13
0
def myLDA(X,y):
    t1 = clock()
    clf = LDA()
    clf.fit(X, y)
    newRep = clf.transform(X)
    t2 = clock()
    return t2-t1
Beispiel #14
0
def eval_func(chromosome):
    alldata = LoadFeatures(data_N_x, data_F_x, chromosome)
    sx, sy, tx, ty = GetData(0.8, alldata)
    clf = LDA()
    clf.fit(sx, sy)
    py = clf.predict(tx)
    return accuracy_score(ty, py)
Beispiel #15
0
 def lda(self, reducedArray = []):
     # components vyjadruju pocet stavov / classov medzi ktorymi rozlisujeme.. staci 0/1 pre target a non-target
     lda = LDA(n_components=2)
     if len(reducedArray) > 0:
         self.ldaMat = lda.fit(np.resize(reducedArray,(len(reducedArray),len(reducedArray[0]))), self.targetVals)
     else:
         self.ldaMat = lda.fit(np.resize(self.signalArray,(len(self.signalArray),len(self.signalArray[0]))), self.targetVals)
Beispiel #16
0
def LDAClassify_Proba(enrollment_id, trainData, trainLabel, testData):
    clf = LDA(solver='lsqr')
    #clf = LDA()
    clf.fit(trainData, ravel(trainLabel))
    testLabel = clf.predict_proba(testData)[:,1]
    saveResult(enrollment_id, testLabel, 'Proba_sklearn_LDA.csv')
    return testLabel
def LDAmeanScore(X, Y, n_folds, dim_reduction=0):
    """
    :param X: matrice d'entree du classifieur, n_samples*n_parameters, n_paramters>=2, n_samples>0. DONNES COHERENTES POUR CLASSIFICATION LDA
    :param Y: matrice des labels, n_samples
    :param n_folds: nombre de tests pour le KFold, >1
    :param dim_reduction: si inferieur ou egale a 0, pas de reduction, sinon, si le nombre de parametre est superieur a dim_reduction, on fait une reduction PCA
    :return: le score moyen de la validation croisee, affiche ce score. Si n_folds>n_samples, renvoie -1
 """
    if dim_reduction > 0 and X.shape[1] > dim_reduction:
        X = dim_reduction_PCA(X, dim_reduction)

    if (X.shape[0] > n_folds):
        # Cross validation pour estimer la performance d'un classifieur LDA
        kf = KFold(n=len(Y), n_folds=n_folds, shuffle=False, random_state=None)
        scores = []
        for train_index, test_index in kf:
            X_train, X_test = X[train_index, :], X[test_index, :]
            Y_train, Y_test = Y[train_index], Y[test_index]
            cl = LDA()
            cl.fit(X_train, Y_train)
            scores.append(cl.score(X_test, Y_test))

        print 'Score moyen : ', np.mean(np.array(scores))
        return 100. * np.mean(np.array(scores))
    else:
        return -1
Beispiel #18
0
def classify(Xtrain,Xtest,Ytrain,Ytest):
    '''
    Linear and RBF SVM classifiers
    '''
    scores = np.zeros((5,))
    

    lr = LogisticRegression()
    lr.fit(Xtrain,Ytrain)
    scores[0] = lr.score(Xtest,Ytest)

    lda = LDA()
    lda.fit(Xtrain,Ytrain)
    scores[1] = lda.score(Xtest,Ytest)

    nb = GaussianNB()
    nb.fit(Xtrain,Ytrain)
    scores[2] = nb.score(Xtest,Ytest)
    
    lsvm = LinearSVC( C = 1)
    lsvm.fit(Xtrain,Ytrain)
    scores[3] = lsvm.score(Xtest,Ytest)
    
    gsvm = SVC(kernel='rbf', C = 1000)
    gsvm.fit(Xtrain,Ytrain)
    scores[4] = gsvm.score(Xtest,Ytest)
    return scores
Beispiel #19
0
def train_lda():
    from sklearn.lda import LDA

    data, classes = get_data_and_classes()
    classifier = LDA()
    classifier.fit(data, classes, store_covariance=True)
    return classifier
def pca_lda(X_train, X_test, y_train, y_test):
    pca = PCA(n_components=500)
    lda = LDA()
    pca.fit(X_train)
    scores = np.dot(X_train, np.transpose(pca.components_))
    lda.fit(scores, y_train)
    return lda.score(scores, y_train, sample_weight=None)
Beispiel #21
0
def read_subpop_data(one_hot=True, fake_data=False, test_size=0.2, undersample=False):

    labeled_dic = convert_txt_to_npy(LABELED_RL_PATH)
    unlabeled_dic = convert_txt_to_npy(UNLABELED_RL_PATH, labeled=False)
    X_train, X_test, y_train, y_test = split_train_test(labeled_dic, test_size=test_size)

    class DataSets(object):
        pass
    data_sets = DataSets()
    
    if undersample:
        from unbalanced_dataset import UnderSampler 
        US = UnderSampler(verbose=True)
        X_train, y_train = US.fit_transform(X_train, y_train)
        
    lda = LDA()
    lda.fit(X_train, y_train)
    score = metrics.accuracy_score(lda.predict(X_test), y_test)
    print("Baseline LDA: %f " % score)

    if one_hot:
        y_train = convert_to_one_hot(y_train)
        y_test = convert_to_one_hot(y_test)

    data_sets = DataSets()
    data_sets.test = DataSet(X_test, y_test)
    data_sets.train = SemiDataSet(unlabeled_dic['data'], X_train, y_train)

    return data_sets
 def DLDA(self, trainLabel, featureData, testData):
     # print featureData == testData
     # print testData
     clf = LDA()
     clf.fit(featureData, trainLabel)
     testLabel = clf.predict(testData)
     return testLabel
Beispiel #23
0
def main():

    logging.basicConfig(format='[%(asctime)s] %(levelname)7s: %(message)s', level=logging.DEBUG)

    all_image_numbers = generate_all_image_numbers(no_of_persons, samples_person)
    classes = all_image_numbers[:, 0]
    all_face_vectors = load_face_vectors_from_disk(all_image_numbers, image_size)

    classifier = LDA()
    logging.debug("Training..")
    classifier.fit(all_face_vectors, classes)

    while True:
        function = input(
            "0)Exit\n"
            "1)Live test\n"
            "2)Test image \"test.JPG\"\n"
            "3)General test\n"
            "\n"
            "Choose function:"
        )
        if function == "1":
            test_live(classifier, all_face_vectors)
        elif function == "2":
            test_one_image(classifier, all_face_vectors)
        elif function == "3":
            test(all_face_vectors, classes)
        elif function == "0":
            return
def pca_lda(X_train,X_test,y_train,y_test):
    pca = PCA(n_components=500)
    lda = LDA()
    pca.fit(X_train)
    scores = np.dot(X_train,np.transpose(pca.components_))
    lda.fit(scores, y_train)
    return lda.score(scores, y_train, sample_weight=None)
Beispiel #25
0
def LDA_train(self, param):
    H = self.get_Htotal(self.Xtrain,
                        self.GXtrain)  # Get the hidden output matrix

    ##############  BOOSTING  ##############
    if (self.D_flag == 1):  # If we have given Weights to the samples
        W_root = np.sqrt(self.D)
        H = H * W_root

    lda = LDA(solver='lsqr')  # svd , lsqr, eigen
    lda.fit(H, self.Ytrain.ravel())

    self.bo = np.zeros(
        (1, self.nO))  # In the standard ELM, these does not count

    proyection = lda.coef_

    self.Wo = proyection.T  # Write the output weights into the structure

    Hmeans = copy.deepcopy(lda.means_)
    self.Hmeans = Hmeans  ### CREATE IT INTERNAL TO USE SOMEWERE ELSE
    self.priors = copy.deepcopy(lda.priors_)
    threshold = np.dot(proyection, (Hmeans[0, :] + Hmeans[1, :]) / 2) - np.log(
        self.priors[1] / self.priors[0])
    #        print Hmeans.shape
    #        print proyection.shape
    self.bo = -threshold
Beispiel #26
0
def do_lda(x, y, folds):
    indexes = list(range(len(x)))
    shuffle(indexes)
    x = list(x[i] for i in indexes)
    y = list(y[i] for i in indexes)
    fold_size = len(x) / folds
    corrects = []
    for fold in range(folds):
        test_x = []
        train_x = []
        test_y = []
        train_y = []
        for i in range(len(x)):
            fold_index = i / fold_size
            if fold == fold_index:
                test_x.append(x[i])
                test_y.append(y[i])
            else:
                train_x.append(x[i])
                train_y.append(y[i])
        print 'Partitioned data into fold'
        test_x, train_x = remove_redundant_dimensions(test_x, train_x)
        print 'Removed redundant dimensions'
        lda = LDA()
        lda.fit(train_x, train_y)
        print 'Fit lda'
        predictions = lda.predict(test_x)
        correct = sum(1 for i in range(len(predictions)) if predictions[i] == test_y[i])
        print 'Did fold, correct:', correct
        corrects.append(correct)
    return corrects
Beispiel #27
0
def LDA_train(self, param):
    H = self.get_H(self.Xtrain)  # Get the hidden output matrix

    lda = LDA(solver='lsqr')  # svd , lsqr, eigen
    lda.fit(H, self.Ytrain.ravel())

    self.bo = np.zeros(
        (1, self.nO))  # In the standard ELM, these does not count

    proyection = lda.coef_

    self.Wo = proyection.T  # Write the output weights into the structure

    Hmeans = copy.deepcopy(lda.means_)
    self.Hmeans = Hmeans  ### CREATE IT INTERNAL TO USE SOMEWERE ELSE
    self.priors = copy.deepcopy(lda.priors_)
    threshold = np.dot(proyection, (Hmeans[0, :] + Hmeans[1, :]) / 2) - np.log(
        self.priors[1] / self.priors[0])
    #        print Hmeans.shape
    #        print proyection.shape
    self.bo = -threshold

    #    print lda.score(self.H, self.Ytrain.ravel())
    #    print self.score(self.Xtrain, self.Ytrain)
    #    print "***************"

    self.flag_LDA = 1
class FGDA(BaseEstimator, TransformerMixin):
    def __init__(self, metric='riemann', tsupdate=False):
        self.metric = metric
        self.tsupdate = tsupdate
        self._ts = TangentSpace(metric=metric, tsupdate=tsupdate)

    def _fit_lda(self, X, y):
        self.classes = numpy.unique(y)
        self._lda = LDA(n_components=len(self.classes) - 1,
                        solver='lsqr',
                        shrinkage='auto')

        ts = self._ts.fit_transform(X)
        self._lda.fit(ts, y)

        W = self._lda.coef_.copy()
        self._W = numpy.dot(
            numpy.dot(W.T, numpy.linalg.pinv(numpy.dot(W, W.T))), W)
        return ts

    def _retro_project(self, ts):
        ts = numpy.dot(ts, self._W)
        return self._ts.inverse_transform(ts)

    def fit(self, X, y=None):
        self._fit_lda(X, y)
        return self

    def transform(self, X):
        ts = self._ts.transform(X)
        return self._retro_project(ts)

    def fit_transform(self, X, y=None):
        ts = self._fit_lda(X, y)
        return self._retro_project(ts)
Beispiel #29
0
def get_performance(test_df, X_std, y):
    Xtest = test_df.ix[:, 'x.1':'x.10'].values
    ytest = test_df.ix[:, 'y'].values

    X_std_test = StandardScaler().fit_transform(Xtest)

    lda_model = LDA()
    lda_model.fit(X_std, y)

    qda_model = QDA()
    qda_model.fit(X_std, y)

    knn_model = KNeighborsClassifier(n_neighbors=10)
    knn_model.fit(X_std, y)

    print "KNN SCORE"
    print knn_model.score(X_std_test, ytest)
    print "LDA SCORE"
    print lda_model.score(X_std_test, ytest)
    print "QDA SCORE"
    print qda_model.score(X_std_test, ytest)

    knn_scores_training = []
    knn_scores_test = []

    for i in range(1, 12):
        knn_model = KNeighborsClassifier(n_neighbors=i)
        knn_model.fit(X_std, y)
        knn_scores_training.append(knn_model.score(X_std_test, ytest))
        knn_scores_test.append(knn_model.score(X_std, y))

    plt.plot(range(11), knn_scores_training, 'r--')
    plt.plot(range(11), knn_scores_test, 'b--')
    plt.axis([0, 10, 0.3, 1.1])
    plt.show()
Beispiel #30
0
def test_classification():
    from read import read
    import numpy, tfidf
    from sklearn.decomposition import TruncatedSVD
    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import Normalizer

    m, files = read("training.json")
    y_map = [str(file["topic"]) for file in files]
    map = []
    for i in range(len(y_map)):
        if (len(map) == 0 or not map.__contains__(y_map[i])):
            map.append(y_map[i])
    y = numpy.array([map.index(y_map[i]) for i in range(len(y_map))])

    print("Construindo TF-IDF...")
    X, vectorizer = tfidf.vectorizeTFIDF(files)
    print X.shape

    print("Performing dimensionality reduction using LDA...")

    lda = LDA(n_components=9)
    X = X.toarray()
    lda.fit(X, y)
    X = lda.transform(X)

    mlp = MLPClassifier()
    mlp.fit(X, y)
    training_score = mlp.score(X, y)
    print("training accuracy: %f" % training_score)
Beispiel #31
0
def main_lda():
	X,y=fh_lda()

	lda=LDA()
	lda.fit(X,y)

	splot=plot_LDA(lda, X, y, lda.fit(X,y).predict(X))
	return splot
def curve_per_subject(subject, data_path, test_labels):
    d = load_train_data(data_path, subject)
    x, y_10m = d['x'], d['y']
    n_train_examples = x.shape[0]
    n_timesteps = x.shape[-1]
    print 'n_preictal', np.sum(y_10m)
    print 'n_inetrictal', np.sum(y_10m - 1)

    x, y = reshape_data(x, y_10m)
    data_scaler = StandardScaler()
    x = data_scaler.fit_transform(x)

    lda = LDA()
    lda.fit(x, y)

    pred_1m = lda.predict_proba(x)[:, 1]
    pred_10m = np.reshape(pred_1m, (n_train_examples, n_timesteps))
    pred_10m = np.mean(pred_10m, axis=1)
    fpr, tpr, threshold = roc_curve(y_10m, pred_10m)
    c = np.sqrt((1 - tpr) ** 2 + fpr ** 2)
    opt_threshold = threshold[np.where(c == np.min(c))[0]][-1]
    print opt_threshold

    # ------- TEST ---------------

    d = load_test_data(data_path, subject)
    x_test, id = d['x'], d['id']
    n_test_examples = x_test.shape[0]
    n_timesteps = x_test.shape[3]
    x_test = reshape_data(x_test)
    x_test = data_scaler.transform(x_test)

    pred_1m = lda.predict_proba(x_test)[:, 1]
    pred_10m = np.reshape(pred_1m, (n_test_examples, n_timesteps))
    pred_10m = np.mean(pred_10m, axis=1)

    y_pred = np.zeros_like(test_labels)
    y_pred[np.where(pred_10m >= opt_threshold)] = 1
    cm = confusion_matrix(test_labels, y_pred)
    print print_cm(cm, labels=['interictal', 'preictal'])
    sn = 1.0 * cm[1, 1] / (cm[1, 1] + cm[1, 0])
    sp = 1.0 * cm[0, 0] / (cm[0, 0] + cm[0, 1])
    print sn, sp

    sn, sp = [], []
    t_list = np.arange(0.0, 1.0, 0.01)
    for t in t_list:
        y_pred = np.zeros_like(test_labels)
        y_pred[np.where(pred_10m >= t)] = 1
        cm = confusion_matrix(test_labels, y_pred)
        sn_t = 1.0 * cm[1, 1] / (cm[1, 1] + cm[1, 0])
        sp_t = 1.0 * cm[0, 0] / (cm[0, 0] + cm[0, 1])
        sn.append(sn_t)
        sp.append(sp_t)

    return t_list, sn, sp
Beispiel #33
0
def curve_per_subject(subject, data_path, test_labels):
    d = load_train_data(data_path, subject)
    x, y_10m = d['x'], d['y']
    n_train_examples = x.shape[0]
    n_timesteps = x.shape[-1]
    print('n_preictal', np.sum(y_10m))
    print('n_inetrictal', np.sum(y_10m - 1))

    x, y = reshape_data(x, y_10m)
    data_scaler = StandardScaler()
    x = data_scaler.fit_transform(x)

    lda = LDA()
    lda.fit(x, y)

    pred_1m = lda.predict_proba(x)[:, 1]
    pred_10m = np.reshape(pred_1m, (n_train_examples, n_timesteps))
    pred_10m = np.mean(pred_10m, axis=1)
    fpr, tpr, threshold = roc_curve(y_10m, pred_10m)
    c = np.sqrt((1 - tpr) ** 2 + fpr ** 2)
    opt_threshold = threshold[np.where(c == np.min(c))[0]][-1]
    print(opt_threshold)

    # ------- TEST ---------------

    d = load_test_data(data_path, subject)
    x_test, id = d['x'], d['id']
    n_test_examples = x_test.shape[0]
    n_timesteps = x_test.shape[3]
    x_test = reshape_data(x_test)
    x_test = data_scaler.transform(x_test)

    pred_1m = lda.predict_proba(x_test)[:, 1]
    pred_10m = np.reshape(pred_1m, (n_test_examples, n_timesteps))
    pred_10m = np.mean(pred_10m, axis=1)

    y_pred = np.zeros_like(test_labels)
    y_pred[np.where(pred_10m >= opt_threshold)] = 1
    cm = confusion_matrix(test_labels, y_pred)
    print(print_cm(cm, labels=['interictal', 'preictal']))
    sn = 1.0 * cm[1, 1] / (cm[1, 1] + cm[1, 0])
    sp = 1.0 * cm[0, 0] / (cm[0, 0] + cm[0, 1])
    print(sn, sp)

    sn, sp = [], []
    t_list = np.arange(0.0, 1.0, 0.01)
    for t in t_list:
        y_pred = np.zeros_like(test_labels)
        y_pred[np.where(pred_10m >= t)] = 1
        cm = confusion_matrix(test_labels, y_pred)
        sn_t = 1.0 * cm[1, 1] / (cm[1, 1] + cm[1, 0])
        sp_t = 1.0 * cm[0, 0] / (cm[0, 0] + cm[0, 1])
        sn.append(sn_t)
        sp.append(sp_t)

    return t_list, sn, sp
def get_LDA(Xtrain, Xtest, Ytrain, Ytest):
    lda = LDA()
    lda.fit(Xtrain, Ytrain)
    scores = np.empty((4))
    scores[0] = lda.score(Xtrain, Ytrain)
    scores[1] = lda.score(Xtest, Ytest)
    print('LDA, train: {0:.02f}% '.format(scores[0] * 100))
    print('LDA, test: {0:.02f}% '.format(scores[1] * 100))

    return lda
def lda(data,labels,n,v_type):
	train_data,train_labels,test_data,test_labels = split_data(data,labels,v_type)

	clf = LDA()
	clf.fit(np.array(train_data,dtype=np.float64), np.array(train_labels,dtype=np.float64))
	y_pred = clf.predict(test_data)
	pure_accuracy_rate = len([y_pred[x] for x in range(len(y_pred)) if y_pred[x] == test_labels[x]])/float(len(test_labels))
	report = classification_report(y_pred, test_labels, target_names=rock_names)
	cm = confusion_matrix(test_labels, y_pred)
	return pure_accuracy_rate,report,y_pred,test_labels,test_data,clf,cm,"LDA"
Beispiel #36
0
def get_LDA(Xtrain, Xtest, Ytrain, Ytest):
        lda = LDA()
        lda.fit(Xtrain,Ytrain)
        scores = np.empty((4))
        scores[0] = lda.score(Xtrain,Ytrain)
        scores[1] = lda.score(Xtest,Ytest)
        print('LDA, train: {0:.02f}% '.format(scores[0]*100))
        print('LDA, test: {0:.02f}% '.format(scores[1]*100))
        
        return lda
Beispiel #37
0
def computeLDA(data, dim):
    samples_indexes = range(len(data))
    indexes, y = lfw.loadTrainingDataLabels(samples_indexes,
                                            min_nb_samples_per_class=10)

    samples = data[indexes]
    lda = LDA(dim)
    lda.fit(data[indexes], y)

    return lda
def lda(ds, n):
    '''
        Outputs the projection of the data in the best
        discriminant dimension.
        Maximum of 2 dimensions for our binary case (values of n greater than this will be ignored by sklearn)
    '''
    selector = LDA(n_components=n)
    selector.fit(ds.data, ds.target)
    new_data = selector.transform(ds.data)
    return Dataset(new_data, ds.target)
def lda(ds, n):
    '''
        Outputs the projection of the data in the best
        discriminant dimension.
        Maximum of 2 dimensions for our binary case (values of n greater than this will be ignored by sklearn)
    '''
    selector = LDA(n_components=n)
    selector.fit(ds.data, ds.target)
    new_data = selector.transform(ds.data)
    return Dataset(new_data, ds.target)
def plotLDA3D(X, y, names=[]):

    plt.cla()
    lda = LDA(n_components=3)
    lda.fit(X, y)
    X = lda.transform(X)

    fig = plt.figure(1, figsize=(4, 3))
    plt.clf()
    ax = Axes3D(fig, rect=[0, 0, 0.95, 1], elev=48, azim=134)

    classes = np.unique(y)
    colors_ = list(six.iteritems(colors.cnames))
    hex_ = [color[1] for color in colors_]
    rgb = [colors.hex2color(color) for color in hex_]
    colors_ = []

    class_label = []
    for i in range(0, len(classes)):
        colors_.append(rgb[i])

        if len(names) == 0:
            class_label.append((str(i), i))
        else:
            class_label.append((names[i], i))

    for name, label in class_label:
        ax.text3D(
            X[y == label, 0.0].mean(),
            X[y == label, 1.0].mean() + 1.5,
            X[y == label, 2.0].mean(),
            name,
            horizontalalignment="center",
            bbox=dict(alpha=0.5, edgecolor="w", facecolor="w"),
        )
    # Reorder the labels to have colors matching the cluster results
    y = y.astype(int)
    # y = np.choose(y, class_label)
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.hot)

    x_surf = [X[:, 0].min(), X[:, 0].max(), X[:, 0].min(), X[:, 0].max()]
    y_surf = [X[:, 0].max(), X[:, 0].max(), X[:, 0].min(), X[:, 0].min()]
    x_surf = np.array(x_surf)
    y_surf = np.array(y_surf)
    v0 = lda.transform(lda.coef_[[0]])
    v0 /= v0[-1]
    v1 = lda.transform(lda.coef_[[1]])
    v1 /= v1[-1]

    ax.w_xaxis.set_ticklabels([])
    ax.w_yaxis.set_ticklabels([])
    ax.w_zaxis.set_ticklabels([])

    plt.show()
	def LDA佮SVM模型(self, 問題, 答案):
		sample_weight_constant = np.ones(len(問題))
		clf = svm.SVC(C=1)
		lda = LDA()
# 		clf = svm.NuSVC()
		print('訓練LDA')
		lda.fit(問題, 答案)
		print('訓練SVM')
		clf.fit(lda.transform(問題), 答案, sample_weight=sample_weight_constant)
		print('訓練了')
		return lambda 問:clf.predict(lda.transform(問))
Beispiel #42
0
def plot_lda_projection(marker, flname):
	lda = LDA()
	lda.fit(marker["individuals"], marker["population_labels"])
	print lda.score(marker["individuals"], marker["population_labels"])
	proj = lda.transform(marker["individuals"])
	n_samples, n_components = proj.shape

	plt.scatter(proj, marker["population_labels"])
	plt.xlabel("Component 0", fontsize=18)
	plt.ylabel("Population Labels", fontsize=18)

	plt.savefig(flname, DPI=200)
Beispiel #43
0
def fit_data(inputs, labels, method):
    if method == 'LDA':
        classifier = LDA()
    if method == 'SVM':
        classifier = SVC()
    if method == 'random_forest':
        classifier = 
        
        
    classifier.fit(inputs, labels)
    
    return classifier
Beispiel #44
0
def get_LDA_performance(test_df, X_std, y):
    X_test = test_df.ix[:, 'x.1':'x.10'].values
    X_std_test = StandardScaler().fit_transform(X_test)
    y_test = test_df.ix[:, 'y'].values

    lda_scores_training = []
    lda_scores_test = []

    qda_scores_training = []
    qda_scores_test = []

    knn_scores_training = []
    knn_scores_test = []

    for d in range(1, 11):
        lda = LDA(n_components=d)
        Xred_lda_training = lda.fit_transform(X_std, y)
        Xred_lda_test = lda.transform(X_std_test)

        lda_model = LDA()
        lda_model.fit(Xred_lda_training, y)

        qda_model = QDA()
        qda_model.fit(Xred_lda_training, y)

        knn_model = KNeighborsClassifier(n_neighbors=10)
        knn_model.fit(Xred_lda_training, y)

        lda_scores_training.append(1 - lda_model.score(Xred_lda_training, y))
        lda_scores_test.append(1 - lda_model.score(Xred_lda_test, y_test))

        qda_scores_training.append(1 - qda_model.score(Xred_lda_training, y))
        qda_scores_test.append(1 - qda_model.score(Xred_lda_test, y_test))

        knn_scores_training.append(1 - knn_model.score(Xred_lda_training, y))
        knn_scores_test.append(1 - knn_model.score(Xred_lda_test, y_test))

    plt.plot(range(10), lda_scores_training, 'r--', label="Train data")
    plt.plot(range(10), lda_scores_test, 'b--', label="Test data")
    plt.title("LDA vs LDA")
    plt.xlabel('k')
    plt.ylabel('Score')
    plt.show()

    plt.plot(range(10), qda_scores_training, 'r--', label="Train data")
    plt.plot(range(10), qda_scores_test, 'b--', label="Test data")
    plt.title("QDA vs LDA")
    plt.show()

    plt.plot(range(10), knn_scores_training, 'r--', label="Train data")
    plt.plot(range(10), knn_scores_test, 'b--', label="Test data")
    plt.title("KNN vs LDA")
    plt.show()
Beispiel #45
0
def lda(X_train, X_val, y_train):
    print("Performing dimensionality reduction using LDA...")
    lda = LDA()
    try:
        lda.fit(X_train, y_train)
    except TypeError:
        X_train = X_train.toarray()
        X_val = X_val.toarray()
        lda.fit(X_train, y_train)
    X_train = lda.transform(X_train)
    X_val = lda.transform(X_val)
    return  X_train, X_val
Beispiel #46
0
def lda_f(train, train_labels, test):
    # LDA
    print ''
    print '----------------'
    print 'LDA:'

    # http://scikit-learn.org/0.16/modules/generated/sklearn.lda.LDA.html
    clf = LDA()
    clf.fit(train, train_labels)
    pred = clf.predict(test)

    return pred
Beispiel #47
0
def lda(X_train, X_val, y_train):
    print("Performing dimensionality reduction using LDA...")
    lda = LDA()
    try:
        lda.fit(X_train, y_train)
    except TypeError:
        X_train = X_train.toarray()
        X_val = X_val.toarray()
        lda.fit(X_train, y_train)
    X_train = lda.transform(X_train)
    X_val = lda.transform(X_val)
    return X_train, X_val
Beispiel #48
0
def LinearDiscriminantAnalysis(x_train, y_train, x_cv, y_cv):
	"""
	Linear Discriminant Analysis Classifier
	"""
	print "Linear Discriminant Analysis"
	clfr = LDA()
	clfr.fit(x_train, y_train)
	#print 'Accuracy in training set: %f' % clfr.score(x_train, y_train)
	#if y_cv != None:
		#print 'Accuracy in cv set: %f' % clfr.score(x_cv, y_cv)
	
	return clfr
Beispiel #49
0
def LDA_select_cv(X, Y, num_features):
    scores = []
    skf = cross_validation.StratifiedKFold(Y, n_folds=10)
    for train, test in skf:
        X_train, X_test, y_train, y_test = X[train], X[test], Y[train], Y[test]
        XRF_train, imp, ind, std = fitRF(X_train, y_train, est=2000)  # RFsel
        XRF_test = X_test[:, ind]  # reorder test set after RFsel
        clf = LDA()
        clf.fit(XRF_train[:, 0:num_features], y_train)
        scores.append(clf.score(XRF_test[:, 0:num_features], y_test))
    score = np.mean(scores)
    return(score)
def train_lda(filename,delim=','):
    start = time.time()
    [X_train, X_test, y_train, y_test] = load_and_split_dataset(filename,delim)
    clf = LDA()
    clf.fit(X_train, y_train)
    end = time.time()
    print('Training Time: '+str((end - start))+'s')

    y_pred = clf.predict(X_test)

    print np.sum(y_pred == y_test)/len(y_pred)
    return y_pred
def plotLDA3D(X, y, names=[]):

    plt.cla()
    lda = LDA(n_components=3)
    lda.fit(X, y)
    X = lda.transform(X)

    fig = plt.figure(1, figsize=(4, 3))
    plt.clf()
    ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)

    classes = np.unique(y)
    colors_ = list(six.iteritems(colors.cnames))
    hex_ = [color[1] for color in colors_]
    rgb = [colors.hex2color(color) for color in hex_]
    colors_ = []

    class_label = []
    for i in range(0, len(classes)):
        colors_.append(rgb[i])

        if (len(names) == 0):
            class_label.append((str(i), i))
        else:
            class_label.append((names[i], i))

    for name, label in class_label:
        ax.text3D(X[y == label, 0.0].mean(),
                  X[y == label, 1.0].mean() + 1.5,
                  X[y == label, 2.0].mean(),
                  name,
                  horizontalalignment='center',
                  bbox=dict(alpha=.5, edgecolor='w', facecolor='w'))
    # Reorder the labels to have colors matching the cluster results
    y = y.astype(int)
    #y = np.choose(y, class_label)
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.hot)

    x_surf = [X[:, 0].min(), X[:, 0].max(), X[:, 0].min(), X[:, 0].max()]
    y_surf = [X[:, 0].max(), X[:, 0].max(), X[:, 0].min(), X[:, 0].min()]
    x_surf = np.array(x_surf)
    y_surf = np.array(y_surf)
    v0 = lda.transform(lda.coef_[[0]])
    v0 /= v0[-1]
    v1 = lda.transform(lda.coef_[[1]])
    v1 /= v1[-1]

    ax.w_xaxis.set_ticklabels([])
    ax.w_yaxis.set_ticklabels([])
    ax.w_zaxis.set_ticklabels([])

    plt.show()
Beispiel #52
0
def lda_test(img_kind):
	import pylab as pl
	

	subdir = "data/"

	classes = []
	data = []

	the_ones = glob.glob(subdir + "f_" + img_kind + "*.jpg")
	all_of_them = glob.glob(subdir + "f_*_*.jpg")
	the_others = []

	for x in all_of_them:
		if the_ones.count(x) < 1:
			the_others.append(x)
	
	for x in the_ones:
		classes.append(1)
		data.append(get_image_features(cv.LoadImageM(x)))
	
	for x in the_others:
		classes.append(-1)
		data.append(get_image_features(cv.LoadImageM(x)))
	
	lda = LDA(n_components=2)
	print 'fiting'
	lda.fit(data, classes)
	print 'transforming'
	X_r = lda.transform(data)
	print '----'

	print X_r.shape

	x0 = [x[0] for x in X_r]
	x1 = [x[1] for x in X_r]

	pl.figure()
	for i in xrange(0,len(x0)):
		if classes[i] == 1:
			pl.scatter(x0[i], x1[i], c = 'r')
		else:
			pl.scatter(x0[i], x1[i], c = 'b')
	

	
	# for c, i, target_name in zip("rg", [1, -1], target_names):
	#     pl.scatter(X_r[classes == i, 0], X_r[classes == i, 1], c=c, label=target_name)
	pl.legend()
	pl.title('LDA of dataset')

	pl.show()
Beispiel #53
0
def classifier(method, X_train, y_train):
    if method == "lda":
        clf = LDA()
    elif method == "knn":
        clf = KNeighborsClassifier(n_neighbors=5, metric='euclidean')
    elif method == "svm":
        clf = GridSearch(X_train, y_train)
    else:
        print("Unknown classifier method ", method)

    clf.fit(X_train, y_train)

    return clf
def feat_extraction(X,y,D):

    # usupervised feature extraction: Principal Component Analysis
    pca = decomposition.PCA(n_components=D)
    pca.fit(X)
    X_pca = pca.transform(X)

    # supervised feature extraction: Linear Discriminative Analysis
    lda = LDA(n_components=D)
    lda.fit(X,y)
    X_lda = lda.transform(X)

    return (X_pca,X_lda)
def feat_extraction(X, y, D):

    # usupervised feature extraction: Principal Component Analysis
    pca = decomposition.PCA(n_components=D)
    pca.fit(X)
    X_pca = pca.transform(X)

    # supervised feature extraction: Linear Discriminative Analysis
    lda = LDA(n_components=D)
    lda.fit(X, y)
    X_lda = lda.transform(X)

    return (X_pca, X_lda)
Beispiel #56
0
def fit(X, y):
    # Do here you training
    #clf = LogisticRegression(penalty="l2")
    #clf = SVC(kernel='linear', probability=True, random_state=0)
    clf1 = LDA()
    #clf = ensemble.RandomForestClassifier(n_estimators=10, max_depth=8, min_samples_leaf=4, n_jobs=4, random_state=0)
    clf1.fit(X, y)
    #pred_y = clf1.predict_proba(X)[:,[1]]
    #pred_y2 = np.vstack([pred_y[0],pred_y[:-1]])
    #pred_y3 = np.vstack([pred_y[0],pred_y[0],pred_y[:-2]])
    #pred_y = np.concatenate((pred_y, pred_y2, pred_y3),axis=1)
    #clf2 = LDA()
    #clf2.fit(pred_y, y)
    return clf1