Beispiel #1
1
 def startlda(self):
     from sklearn.lda import LDA
     clf=LDA()
     X=np.array(self.traindata)
     Y=np.array(self.trainclass)
     y=self.testdata
     X=[[float(y) for y in x] for x in X]
     Y=[[int(y) for y in x] for x in Y]
     y=[[float(y) for y in x] for x in self.testdata]
     clf.fit(X,Y)
     print clf.predict(y)
Beispiel #2
1
class Ensemble:

	def __init__(self, data):
		self.rf = RandomForestClassifier(n_estimators=80, n_jobs=-1, min_samples_split=45, criterion='entropy')
		self.lda = LDA()
		self.dec = DecisionTreeClassifier(criterion='entropy')
		self.ada = AdaBoostClassifier(n_estimators=500, learning_rate=0.25)

		self.make_prediction(data)


	def make_prediction(self, data):
		'''
		Make an ensemble prediction
		'''
		self.rf.fit(data.features_train, data.labels_train)
		self.lda.fit(data.features_train, data.labels_train)
		self.dec.fit(data.features_train, data.labels_train)
		self.ada.fit(data.features_train, data.labels_train)

		pre_pred = []
		self.pred = []

		ada_pred = self.ada.predict(data.features_test)
		rf_pred = self.rf.predict(data.features_test)
		lda_pred = self.lda.predict(data.features_test)
		dec_pred = self.dec.predict(data.features_test)

		for i in range(len(rf_pred)):
			pre_pred.append([ rf_pred[i], lda_pred[i], dec_pred[i], ada_pred[i] ])

		for entry in pre_pred:
			pred_list = sorted(entry, key=entry.count, reverse=True)
			self.pred.append(pred_list[0])
Beispiel #3
1
def tryLinearDiscriminantAnalysis(goFast):
  from sklearn.datasets import dump_svmlight_file, load_svmlight_file
  if goFast:
    training_data, training_labels = load_svmlight_file("dt1_1500.trn.svm", n_features=253659, zero_based=True)
    validation_data, validation_labels = load_svmlight_file("dt1_1500.vld.svm", n_features=253659, zero_based=True)
    testing_data, testing_labels = load_svmlight_file("dt1_1500.tst.svm", n_features=253659, zero_based=True)
  else:
    training_data, training_labels = load_svmlight_file("dt1.trn.svm", n_features=253659, zero_based=True)
    validation_data, validation_labels = load_svmlight_file("dt1.vld.svm", n_features=253659, zero_based=True)
    testing_data, testing_labels = load_svmlight_file("dt1.tst.svm", n_features=253659, zero_based=True)

  from sklearn.lda import LDA
  from sklearn.metrics import accuracy_score
  from sklearn.grid_search import ParameterGrid
  from sklearn.decomposition import RandomizedPCA

  rpcaDataGrid = [{"n_components": [10,45,70,100],
                    "iterated_power": [2, 3, 4],
                    "whiten": [True]}]

  for rpca_parameter_set in ParameterGrid(rpcaDataGrid):
    rpcaOperator = RandomizedPCA(**rpca_parameter_set)
    rpcaOperator.fit(training_data,training_labels)
    new_training_data = rpcaOperator.transform(training_data,training_labels)
    new_validation_data = rpcaOperator.transform(validation_data,validation_labels)
    ldaOperator = LDA()
    ldaOperator.fit(new_training_data,training_labels)
    print "Score = " + str(accuracy_score(validation_labels,ldaOperator.predict(new_validation_data)))
Beispiel #4
0
 def lda_on(train_x,
            train_y,
            test_x,
            test_y,
            feats_name='all_features'):
     """ Linear Discriminant Analysis """
     lda = LDA()
     lda.fit(train_x, train_y, store_covariance=True)
     print feats_name, "(train):", lda.score(train_x, train_y)
     print feats_name, "(test):", lda.score(test_x, test_y)
     with open(dataset_name + '_lda_classif_' + feats_name + '.pickle',
               'w') as w_f:
         cPickle.dump(lda, w_f)
     y_pred = lda.predict(test_x)
     X_train, X_validate, y_train, y_validate = cross_validation\
             .train_test_split(train_x, train_y, test_size=0.2,
                     random_state=0)
     lda.fit(X_train, y_train)
     print feats_name, "(validation):", lda.score(
         X_validate, y_validate)
     y_pred_valid = lda.predict(X_validate)
     cm_test = confusion_matrix(test_y, y_pred)
     cm_valid = confusion_matrix(y_validate, y_pred_valid)
     np.set_printoptions(threshold='nan')
     with open("cm_test" + feats_name + ".txt", 'w') as w_f:
         print >> w_f, cm_test
     with open("cm_valid" + feats_name + ".txt", 'w') as w_f:
         print >> w_f, cm_valid
def ldapredict(trainData,testData,trainOuts,testOuts):
	clf = LDA()
	print(clf.fit(trainData,trainOuts))
	predictions = clf.predict(testData)
	print(predictions)
	misses,error = sup.crunchTestResults(predictions,testOuts,.5)
	print(1-error)
Beispiel #6
0
def read_subpop_data(one_hot=True, fake_data=False, test_size=0.2, undersample=False):

    labeled_dic = convert_txt_to_npy(LABELED_RL_PATH)
    unlabeled_dic = convert_txt_to_npy(UNLABELED_RL_PATH, labeled=False)
    X_train, X_test, y_train, y_test = split_train_test(labeled_dic, test_size=test_size)

    class DataSets(object):
        pass
    data_sets = DataSets()
    
    if undersample:
        from unbalanced_dataset import UnderSampler 
        US = UnderSampler(verbose=True)
        X_train, y_train = US.fit_transform(X_train, y_train)
        
    lda = LDA()
    lda.fit(X_train, y_train)
    score = metrics.accuracy_score(lda.predict(X_test), y_test)
    print("Baseline LDA: %f " % score)

    if one_hot:
        y_train = convert_to_one_hot(y_train)
        y_test = convert_to_one_hot(y_test)

    data_sets = DataSets()
    data_sets.test = DataSet(X_test, y_test)
    data_sets.train = SemiDataSet(unlabeled_dic['data'], X_train, y_train)

    return data_sets
Beispiel #7
0
    def test_all_methods(self):
        x_cols = ["Lag2"]
        formula = "Direction~Lag2"
        # print self.df.shape[0]
        train_data = self.df.ix[(self.df["Year"] >= 1990) & (self.df["Year"] <= 2008), :]
        # print train_data.shape[0]
        """ (d) logistic"""
        model = smf.glm(formula, data=train_data, family=sm.families.Binomial())
        result = model.fit()
        test_data = self.df.ix[self.df["Year"] > 2008, :]
        probs = Series(result.predict(sm.add_constant(test_data[["Lag2"]])))
        pred_values = probs.map(lambda x: "Down" if x > 0.5 else "Up")
        tp.output_table(pred_values.values, test_data[self.y_col].values)

        train_X = train_data[x_cols].values
        train_y = train_data[self.y_col].values
        test_X = test_data[x_cols].values
        test_y = test_data[self.y_col].values
        """ (e) LDA """
        lda_res = LDA().fit(train_X, train_y)
        pred_y = lda_res.predict(test_X)
        tp.output_table(pred_y, test_y)
        """ (f) QDA """
        qda_res = QDA().fit(train_X, train_y)
        pred_y = qda_res.predict(test_X)
        tp.output_table(pred_y, test_y)
        """ (g) KNN """
        clf = neighbors.KNeighborsClassifier(1, weights="uniform")
        clf.fit(train_X, train_y)
        pred_y = clf.predict(test_X)
        tp.output_table(pred_y, test_y)
        """ (h) logistic and LDA """
        """ (i) Is the purpose of the last question going through all methods with no direction?"""
Beispiel #8
0
 def DLDA(self, trainLabel, featureData, testData):
     # print featureData == testData
     # print testData
     clf = LDA()
     clf.fit(featureData, trainLabel)
     testLabel = clf.predict(testData)
     return testLabel
    def test_twomethods(self):
        key_y_pred = 'y' + conf.SEP + conf.PREDICTION
        X, y = datasets.make_classification(n_samples=20, n_features=5,
                                            n_informative=2)
        # = With EPAC
        wf = Methods(LDA(), SVC(kernel="linear"))
        r_epac = wf.run(X=X, y=y)

        # = With SKLEARN
        lda = LDA()
        svm = SVC(kernel="linear")
        lda.fit(X, y)
        svm.fit(X, y)
        r_sklearn = [lda.predict(X), svm.predict(X)]

        # Comparison
        for i_cls in range(2):
            comp = np.all(np.asarray(r_epac[i_cls][key_y_pred]) ==
                                    np.asarray(r_sklearn[i_cls]))
            self.assertTrue(comp, u'Diff Methods')

        # test reduce
        r_epac_reduce = [wf.reduce().values()[0][key_y_pred],
            wf.reduce().values()[1][key_y_pred]]
        comp = np.all(np.asarray(r_epac_reduce) == np.asarray(r_sklearn))
        self.assertTrue(comp, u'Diff Perm / CV: EPAC reduce')
	def LDA模型(self, 問題, 答案):
		lda = LDA()
# 		clf = svm.NuSVC()
		print('訓練LDA')
		lda.fit(問題, 答案)
		print('訓練了')
		return lambda 問:lda.predict(問)
Beispiel #11
0
def eval_func(chromosome):
    alldata = LoadFeatures(data_N_x, data_F_x, chromosome)
    sx, sy, tx, ty = GetData(0.8, alldata)
    clf = LDA()
    clf.fit(sx, sy)
    py = clf.predict(tx)
    return accuracy_score(ty, py)
Beispiel #12
0
def do_lda(x, y, folds):
    indexes = list(range(len(x)))
    shuffle(indexes)
    x = list(x[i] for i in indexes)
    y = list(y[i] for i in indexes)
    fold_size = len(x) / folds
    corrects = []
    for fold in range(folds):
        test_x = []
        train_x = []
        test_y = []
        train_y = []
        for i in range(len(x)):
            fold_index = i / fold_size
            if fold == fold_index:
                test_x.append(x[i])
                test_y.append(y[i])
            else:
                train_x.append(x[i])
                train_y.append(y[i])
        print 'Partitioned data into fold'
        test_x, train_x = remove_redundant_dimensions(test_x, train_x)
        print 'Removed redundant dimensions'
        lda = LDA()
        lda.fit(train_x, train_y)
        print 'Fit lda'
        predictions = lda.predict(test_x)
        correct = sum(1 for i in range(len(predictions)) if predictions[i] == test_y[i])
        print 'Did fold, correct:', correct
        corrects.append(correct)
    return corrects
Beispiel #13
0
def main():
    
    for question in range(3,18):
        
        print("Question ", question, " Percent Accuracy")

        trainingSet_features, trainingSet_labels, testSet_features, testSet_labels = loadTrainingAndTestData(question)
        #print(len(trainingSet_features))
        #print(trainingSet_labels)
        #print(len(testSet_features))
        #print(len(testSet_labels))
        
        #print(trainingSet_labels)
        nnC = KNeighborsClassifier(n_neighbors=5)
        nnC.fit(trainingSet_features, trainingSet_labels) 
        nnC_predictions = nnC.predict(testSet_features)
        print("Nearest Neighbor: %.2f" % (100*accuracy_score(testSet_labels,nnC_predictions)),"%")

        svmC = svm.SVC()
        svmC.fit(trainingSet_features, trainingSet_labels) 
        svmCpredictions = svmC.predict(testSet_features)
        print("Support Vector Machines: %.2f" % (100*accuracy_score(testSet_labels,svmCpredictions)),"%")

        rfC = RandomForestClassifier(n_estimators=100)
        rfC.fit(trainingSet_features, trainingSet_labels) 
        rfC_predictions = rfC.predict(testSet_features)
        print("Random Forrest:  %.2f" % (100*accuracy_score(testSet_labels,rfC_predictions)),"%")

        ldaC = LDA(solver='lsqr')
        ldaC.fit(trainingSet_features, trainingSet_labels) 
        ldaC_predictions = ldaC.predict(testSet_features)
        print("Linear Discriminant Analysis Classifier: %.2f" % (100*accuracy_score(testSet_labels,ldaC_predictions)),"%")
 def DLDA(self, trainLabel, featureData, testData):
     # print featureData == testData
     # print testData
     clf = LDA()
     clf.fit(featureData, trainLabel)
     testLabel = clf.predict(testData)
     return testLabel
def eval_lda(X_train, y_train, X_test, y_test):
	wrongtrain=0
	wrongtest=0
	#train set
	pri = prior(y_train)
	#clf = LDA(priors=pri)
	clf = LDA()
	clf.fit(X_train, y_train)
	y_pred_train = clf.predict(X_train)
	y_pred = clf.predict(X_test)
	
	for y in xrange(len(y_pred_train)):	
		if y_pred_train[y] != y_train[y]:
			wrongtrain +=1

	for y in xrange(len(y_pred)):	
		if y_pred[y] != y_test[y]:
			wrongtest +=1
	return wrongtrain/len(y_train), wrongtest/len(y_test)
Beispiel #16
0
 def lda_on(train_x, train_y, test_x, test_y, feats_name='all_features'):
     lda = LDA()
     lda.fit(train_x, train_y, store_covariance=True)
     print feats_name, "(train):", lda.score(train_x, train_y)
     print feats_name, "(test):", lda.score(test_x, test_y)
     with open(dataset_name + '_lda_classif_' + feats_name + '.pickle', 'w') as f:
         cPickle.dump(lda, f)
     y_pred = lda.predict(test_x)
     X_train, X_validate, y_train, y_validate = cross_validation.train_test_split(train_x, train_y, test_size=0.2, random_state=0)
     lda.fit(X_train, y_train)
     print feats_name, "(validation):", lda.score(X_validate, y_validate)
     y_pred_valid = lda.predict(X_validate)
     cm_test = confusion_matrix(test_y, y_pred)
     cm_valid = confusion_matrix(y_validate, y_pred_valid)
     np.set_printoptions(threshold='nan')
     with open("cm_test" + feats_name + ".txt", 'w') as wf:
         print >> wf, cm_test
     with open("cm_valid" + feats_name + ".txt", 'w') as wf:
         print >> wf, cm_valid
def lda(data,labels,n,v_type):
	train_data,train_labels,test_data,test_labels = split_data(data,labels,v_type)

	clf = LDA()
	clf.fit(np.array(train_data,dtype=np.float64), np.array(train_labels,dtype=np.float64))
	y_pred = clf.predict(test_data)
	pure_accuracy_rate = len([y_pred[x] for x in range(len(y_pred)) if y_pred[x] == test_labels[x]])/float(len(test_labels))
	report = classification_report(y_pred, test_labels, target_names=rock_names)
	cm = confusion_matrix(test_labels, y_pred)
	return pure_accuracy_rate,report,y_pred,test_labels,test_data,clf,cm,"LDA"
Beispiel #18
0
def test():    
    class1 = np.mat([
        (2.9500 , 6.6300),
        (2.5300  , 7.7900),
        (3.5700 , 5.6500),
        (3.1600, 5.4700),
    ])
    class2 = np.mat([
        (2.5800 , 4.4600),
        (2.1600, 6.2200),
        (3.2700 , 3.5200),
        ])
    test = (2.81, 5.46)
    lda = myLDA(class1, class2)
    print lda.predict(test)

    lda = LDA()
    lda.fit(np.concatenate((class1, class2)), np.concatenate((np.zeros((3, 1)), np.ones((4, 1))), axis=0),
                 store_covariance=True)
    print lda.predict(test)
def train_lda(filename,delim=','):
    start = time.time()
    [X_train, X_test, y_train, y_test] = load_and_split_dataset(filename,delim)
    clf = LDA()
    clf.fit(X_train, y_train)
    end = time.time()
    print('Training Time: '+str((end - start))+'s')

    y_pred = clf.predict(X_test)

    print np.sum(y_pred == y_test)/len(y_pred)
    return y_pred
Beispiel #20
0
def test():
    class1 = np.mat([
        (2.9500, 6.6300),
        (2.5300, 7.7900),
        (3.5700, 5.6500),
        (3.1600, 5.4700),
    ])
    class2 = np.mat([
        (2.5800, 4.4600),
        (2.1600, 6.2200),
        (3.2700, 3.5200),
    ])
    test = (2.81, 5.46)
    lda = myLDA(class1, class2)
    print lda.predict(test)

    lda = LDA()
    lda.fit(np.concatenate((class1, class2)),
            np.concatenate((np.zeros((3, 1)), np.ones((4, 1))), axis=0),
            store_covariance=True)
    print lda.predict(test)
Beispiel #21
0
def lda_f(train, train_labels, test):
    # LDA
    print ''
    print '----------------'
    print 'LDA:'

    # http://scikit-learn.org/0.16/modules/generated/sklearn.lda.LDA.html
    clf = LDA()
    clf.fit(train, train_labels)
    pred = clf.predict(test)

    return pred
def FisherLD(images):
	a = 0
	coordinates = [[0 for x in range(28)] for x in range(28)]  	#This is a list of coordinate values, each x y pair coorresponding to
																#a place in values (coordinates[0][0] -> values[0], coordinates[0][1] -> values[1]) 

	values = []		#This is the value of each spot within the image, either a 1 or 0													
	
	#Populate the list of coordinates
	for x in range(size):
		for y in range(size):
			coordinates[x][y] = x 
	
	#Populate the list of values
	for image in images:
		values.append(image.norm)

	#Perform LDA
	clf = LDA()
	clf.fit(coordinates, values)

	print(clf.predict([[-0.8, -1]]))
	return clf.predict([[-0.8, -1]])
class LinearDiscriminantAnalysis(object):
    def __init__(self, input_matrix, labels):
        self.x = input_matrix
        self.y = labels
        self.clf = LDA()

    def train(self):
        self.clf.fit(self.x, self.y)

    def predict(self, x):
        return self.clf.predict(x)

    def save_model(self, file):
        joblib.dump(self.clf, file)
class ProteinFamilyClassifier(object):

    def __init__(self, word_length):
        self.word_length = word_length
        self.clf = None

    def fit(self, data):
        sequences, families = zip(*data)

        # Create signatures from the CGR representations, as our X
        signatures = [create_cgr_signature(seq, self.word_length)
                      for seq in sequences]

        self.clf = LDA()
        self.clf.fit(np.array(signatures), families)

        # TODO: maybe return some information about the new feature space ?

    def predict(self, data):
        if not self.clf:
            raise RuntimeError("Cannot call predict before running fit.")

        sequences, true_families = zip(*data)

        # Create signatures from the CGR representations, as our X
        signatures = [create_cgr_signature(seq, self.word_length)
                      for seq in sequences]

        predicted_families = self.clf.predict(signatures)

        # precision, recall, fscore, support = precision_recall_fscore_support(
        #     true_families, predicted_families)
        #
        # confusion_matrix = \
        #     confusion_matrix(true_families, predicted_families)
        #
        # metrics = {
        #     'accuracy': accuracy_score(true_families, predicted_families),
        #     'precision': precision,
        #     'recall': recall,
        #     'fscore': fscore,
        #     'support': support,
        #     'confusion_matrix': confusion_matrix,
        #     # 'roc_auc': roc_auc_score(true_families, predicted_families),
        # }

        return classification_report(true_families, predicted_families)
Beispiel #25
0
def runTestPairs( e ):
    x = e[0]; y = e[1]
    trainX = labelsmaptra[x] + labelsmaptra[y]
    labelsX = [x]*len(labelsmaptra[x]) + [y]*len(labelsmaptra[y])

    clf = LDA()
    clf.fit( trainX, labelsX )

    testX = labelsmaptes[x] + labelsmaptes[y]
    labelsX = [x]*len(labelsmaptes[x]) + [y]*len(labelsmaptes[y])
    error = 0
    for lab, test in zip( labelsX, testX ):
        pred = clf.predict(test)
        if lab != pred:
            error += 1
    print e, error, error/float(len(testX))
    return ( e, error, error/float(len(testX)) )
Beispiel #26
0
def LDA(data, label, pred_data, pred_last):
    '''not good,不需要规范化
    '''
    data = np.array(data)
    pred_data = np.array(pred_data)
    label = np.array(label)
    pred_last = np.array(pred_last)
    from sklearn.lda import LDA
    gnb = LDA()
    gnb.fit(data, label)

    print gnb.score(data, label)
    pred_result = gnb.predict(pred_data)
    print("Number of mislabeled points out of a total %d points : %d" %
          (pred_data.shape[0], (pred_last != pred_result).sum()))
    print gnb.score(pred_data, pred_last)
    return pred_result
Beispiel #27
0
def runTestPairs(e):
    x = e[0]
    y = e[1]
    trainX = labelsmaptra[x] + labelsmaptra[y]
    labelsX = [x] * len(labelsmaptra[x]) + [y] * len(labelsmaptra[y])

    clf = LDA()
    clf.fit(trainX, labelsX)

    testX = labelsmaptes[x] + labelsmaptes[y]
    labelsX = [x] * len(labelsmaptes[x]) + [y] * len(labelsmaptes[y])
    error = 0
    for lab, test in zip(labelsX, testX):
        pred = clf.predict(test)
        if lab != pred:
            error += 1
    print e, error, error / float(len(testX))
    return (e, error, error / float(len(testX)))
def lda(input_file,Output,test_size):
    lvltrace.lvltrace("LVLEntree dans lda split_test")
    ncol=tools.file_col_coma(input_file)
    data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
    X = data[:,1:]
    y = data[:,0]
    n_samples, n_features = X.shape
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    print X_train.shape, X_test.shape
    lda=LDA(n_components=2)
    lda.fit(X_train,y_train)
    X_LDA = lda.transform(X_train)
    print "shape of result:", X_LDA.shape
    y_pred = lda.predict(X_test)
    print "Linear Discriminant Analysis Accuracy "
    print "classification accuracy:", metrics.accuracy_score(y_test, y_pred)
    print "precision:", metrics.precision_score(y_test, y_pred)
    print "recall:", metrics.recall_score(y_test, y_pred)
    print "f1 score:", metrics.f1_score(y_test, y_pred)
    #LVLprint "\n"
    results = Output+"LDA_metrics_test.txt"
    file = open(results, "w")
    file.write("Linear Discriminant Analaysis estimator accuracy\n")
    file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y_test, y_pred))
    file.write("Precision Score: %f\n"%metrics.precision_score(y_test, y_pred))
    file.write("Recall Score: %f\n"%metrics.recall_score(y_test, y_pred))
    file.write("F1 Score: %f\n"%metrics.f1_score(y_test, y_pred))
    file.write("\n")
    file.write("True Value, Predicted Value, Iteration\n")
    for n in xrange(len(y_test)):
        file.write("%f,%f,%i\n"%(y_test[n],y_pred[n],(n+1)))
    file.close()
    title = "LDA %f"%test_size
    save = Output + "LDA_confusion_matrix"+"_%s.png"%test_size
    plot_confusion_matrix(y_test, y_pred,title,save)
    # plot the results along with the labels
    fig, ax = plt.subplots()
    im = ax.scatter(X_LDA[:, 0], X_LDA[:, 1], c=y_train)
    fig.colorbar(im);
    save_lda = Output + "LDA_plot_test"+"_%s.png"%test_size
    plt.savefig(save_lda)
    plt.close()
    lvltrace.lvltrace("LVLSortie dans lda split_test")
def lda(input_file,Output):
    lvltrace.lvltrace("LVLEntree dans lda")
    ncol=tools.file_col_coma(input_file)
    data = np.loadtxt(input_file, delimiter=',', usecols=range(ncol-1))
    X = data[:,1:]
    y = data[:,0]
    n_samples, n_features = X.shape
    #lda=LDA(n_components=2)
    lda=LDA()
    lda.fit(X,y)
    X_LDA = lda.transform(X)
    y_pred = lda.predict(X)
    print "#########################################################################################################\n"
    print "Linear Discriminant Analysis Accuracy "
    print "classification accuracy:", metrics.accuracy_score(y, y_pred)
    print "precision:", metrics.precision_score(y, y_pred)
    print "recall:", metrics.recall_score(y, y_pred)
    print "f1 score:", metrics.f1_score(y, y_pred)
    print "\n"
    print "#########################################################################################################\n"
    results = Output+"LDA_metrics.txt"
    file = open(results, "w")
    file.write("Linear Discriminant Analaysis estimator accuracy\n")
    file.write("Classification Accuracy Score: %f\n"%metrics.accuracy_score(y, y_pred))
    file.write("Precision Score: %f\n"%metrics.precision_score(y, y_pred))
    file.write("Recall Score: %f\n"%metrics.recall_score(y, y_pred))
    file.write("F1 Score: %f\n"%metrics.f1_score(y, y_pred))
    file.write("\n")
    file.write("True Value, Predicted Value, Iteration\n")
    for n in xrange(len(y)):
        file.write("%f,%f,%i\n"%(y[n],y_pred[n],(n+1)))
    file.close()
    title = "LDA"
    save = Output + "LDA_confusion_matrix.png"
    plot_confusion_matrix(y, y_pred,title,save)
    # plot the results along with the labels
    fig, ax = plt.subplots()
    im = ax.scatter(X_LDA[:, 0], X_LDA[:, 1], c=y)
    fig.colorbar(im);
    save_lda = Output + "LDA_plot.png"
    plt.savefig(save_lda)
    plt.close()
    lvltrace.lvltrace("LVLSortie dans lda")
class LDAClassifier(Classifier):
    '''Linear Discriminant analysis classifier'''
    def __init__(self):
        super(LDAClassifier, self).__init__()
        self.fig = 20
        self.is_trainable = True
        self.is_trained = False

    def train(self, classification_data, indices=None, settings_name=None, **kwargs):
        super(LDAClassifier, self).train(classification_data, indices, settings_name, **kwargs)
        indices = self.settings['indices']

        self.lda = LDA(**self.classifier_kwargs)

        self.lda.fit(classification_data.data[:, indices], classification_data.are_hurr_actual)
        return self

    def classify(self, classification_data):
        super(LDAClassifier, self).classify(classification_data)
        indices = self.settings['indices']

        self.are_hurr_pred = self.lda.predict(classification_data.data[:, indices])
        return self.are_hurr_pred
Beispiel #31
0
def parseOtu():
    fn = r'..\cfs_data\otu_table_mc2_w_tax_even32233.txt'
    fid = open(fn)
    line = fid.readline()
    subjects = (fid.readline()).split('\t')
    subjects = subjects[1:-1]
    numSubjects = len(subjects)
    mat = []
    otus = []
    while True:
        line = (fid.readline()).split('\t')
        otus.append(line[-1])
        if line[0] is '':
            break
        mat.append(np.array([float(i) for i in line[1:-1]]))
    mat = np.array(mat)

    # Compare at the family level
    # Gather all families
    families = []
    for k in range(len(otus)):
        tmp = otus[k].split(';')
        if len(tmp) >= 5:
            if len(tmp[4]) > 4:
                family = str.strip(tmp[4][4:])
                family = family.replace('[', '')
                family = family.replace(']', '')
                families.append(family)

    families = set(families)
    numFamilies = len(families)
    rr = np.zeros((numFamilies, numSubjects))
    # Gather up rows for a specific family
    idx = 0
    for k in otus:
        famId = 0
        for family in families:
            if k.find(family) > -1:
                rr[famId, :] += mat[idx, :]
                break
            famId += 1
        idx += 1

    # Normalize
    rr = rr / np.sum(rr, axis=0)[np.newaxis]

    # read in control vs patients
    fid = open(r'..\cfs_data\mapping_metadata_CFS.txt')
    reader = csv.DictReader(fid, delimiter='\t')
    controls = []
    patients = []
    idx = 0
    for row in reader:
        if row['Subject'] == 'Control':
            controls.append(row['#SampleID'])
        if row['Subject'] == 'Patient':
            patients.append(row['#SampleID'])

    controlsIdx = []
    patientsIdx = []
    for k in range(len(subjects)):
        for kk in controls:
            if subjects[k] == kk:
                controlsIdx.append(k)
        for kk in patients:
            if subjects[k] == kk:
                patientsIdx.append(k)

    patientsIdx = np.array(patientsIdx)
    controlsIdx = np.array(controlsIdx)
    controlMat = rr[:, controlsIdx]
    patientMat = rr[:, patientsIdx]
    inputMat = np.hstack((patientMat, controlMat))
    outputVec = np.hstack(
        (np.ones(patientMat.shape[1]), -np.ones(controlMat.shape[1])))

    # Affinity Matrix
    numSubjects = inputMat.shape[1]
    aff_mat = np.zeros((numSubjects, numSubjects))
    for k in range(numSubjects):
        for kk in range(numSubjects):
            aff_mat[k, kk] = 1 / np.sqrt(
                np.sum((inputMat[:, k] - inputMat[:, kk])**2))
    plt.figure()
    plt.imshow(aff_mat)
    plt.show()

    from sklearn.lda import LDA
    clf = LDA()
    clf.fit(inputMat[:, 1:80].T, outputVec[1:80])
    fit = clf.predict(inputMat.T)
    err = np.sum(np.abs(fit - outputVec) > 0)
    print(err)

    # SVM
    clf = sklearn.svm.SVC(kernel='linear', C=1e-1)
    n_samples = inputMat.shape[1]
    cv = sklearn.cross_validation.KFold(n_samples, n_folds=8, shuffle=True)
    scores = sklearn.cross_validation.cross_val_score(clf,
                                                      inputMat.T,
                                                      outputVec,
                                                      cv=cv)

    # Predict my data
    # Read in my data file
    myOtu = otu.OTU(r'..\sample_data\01112016.json')

    # Get family distribution
    gen = myOtu.getTaxonomy('genus')

    myOtu.mergeTaxonomy('family', families)
    myOtu.getDistribution('family')

    # Do LDA fit
    # run predictor

    return
model_transf = LogisticRegression()
model_transf = model_transf.fit(X_transf[:200,:],Y[:200])

#Fazendo a classificacao dos dados de teste do conjunto de dados original
predicted = model.predict(X[200:])

#Fazendo a classificacao dos dados de teste do conjunto de dados transformado
predicted_transf = model_transf.predict(X_transf[200:])

#Verificando a acuracia das classificacoes ----- Resposta da pergunta 2
print "Acuracia da regressao logistica no conjunto de dados original: "+str(metrics.accuracy_score(Y[200:], predicted))
print "Acuracia da regressao logistica no conjunto de dados transformado: "+str(metrics.accuracy_score(Y[200:], predicted_transf))


#Aplicando o LDA aos dados de treino do conjunto de dados original
model_LDA = LDA()
model_LDA = model_LDA.fit(X[:200],Y[:200])

#Aplicando o LDA aos dados de treino do conjunto de dados transformado
model_LDA_transf = LDA()
model_LDA_transf = model_LDA_transf.fit(X_transf[:200],Y[:200])

#Fazendo a classificacao dos dados de teste do conjunto de dados original
predicted_LDA = model_LDA.predict(X[200:])

#Fazendo a classificacao dos dados de teste do conjunto de dados transformado
predicted_LDA_transf = model_LDA_transf.predict(X_transf[200:])

#Verificando a acuracia das classificacoes ----- Resposta da pergunta 3
print "Acuracia do LDA no conjunto de dados original: "+str(metrics.accuracy_score(Y[200:], predicted_LDA))
print "Acuracia do LDA no conjunto de dados transformado: "+str(metrics.accuracy_score(Y[200:], predicted_LDA_transf))
Beispiel #33
0
    Xpart = Xproj[np.where(y_species == species_id)[0], :]
    plt.scatter(Xpart[:, 0], Xpart[:, 1], color=colors[i])
    i = i + 1
plt.title("Citrus Species (first 2 Principal Components)")
plt.xlabel("X0")
plt.ylabel("X1")
plt.show()

# Perform multiclass LDA
Xtrain, Xtest, ytrain, ytest = train_test_split(Xscaled,
                                                y_species,
                                                test_size=0.25,
                                                random_state=42)
clf = LDA(len(species_ids))
clf.fit(Xtrain, ytrain)
ypred = clf.predict(Xtest)
print "LDA Accuracy Score: %.3f" % (accuracy_score(ypred, ytest))

# What varieties are most spectrally similar?
corr = np.corrcoef(clf.means_)
plt.imshow(corr, interpolation="nearest", cmap=plt.cm.cool)
plt.xticks(np.arange(len(species_ids)), species_ids, rotation=45)
plt.yticks(np.arange(len(species_ids)), species_ids)
plt.colorbar()
plt.show()

# Find LDA classifier accuracy using cross validation
kfold = KFold(Xscaled.shape[0], 10)
scores = []
for train, test in kfold:
    Xtrain, Xtest, ytrain, ytest = Xscaled[train], Xscaled[test], \
def analyze_by_t2t(R, trl_ix, t2t):
    perc = [0]+[np.percentile(t2t, i) for i in [25, 50, 75, 100]]

    n_assemblies = R.shape[1]
    f, ax = plt.subplots(nrows=n_assemblies)

    labels = {}

    for i, p in enumerate(perc[:-1]):
        p2 = perc[i+1]
        ix = np.nonzero(np.logical_and(t2t>p, t2t<=p2))[0]
        labels[i] = ix
        for a in range(n_assemblies):
            tmp = []
            for ii, x in enumerate(ix):
                xx = np.nonzero(trl_ix == x)[0]
                tmp.append(R[xx, a])
            
            labels[i, a, 'trl'] = np.vstack((tmp))
            m = np.mean(np.vstack((tmp)), axis=0)
            sem = np.std(np.vstack((tmp)), axis=0)/len(ix)
            ax[a].plot(m, color=cmap_list[i])
            ax[a].fill_between(np.arange(len(m)), m-sem, m+sem, color=cmap_list[i], alpha=.5)
            ax[a].plot(int(np.floor(p2*10)), m[int(np.floor(p2*10))-1], '.', markersize=20, color=cmap_list[i])

    #Classify new trials: 
    test={}
    train={}

    from sklearn.lda import LDA
    chance = {}
    for a in range(n_assemblies):
        lda = LDA()
        lda.n_components = len(perc) - 1

        X = []
        Y = []

        X_test = []
        Y_test = []

        ix_train = {}
        ix_test = {}

        for k in range(len(perc)-1):
            n = len(labels[k])
            ix = np.random.permutation(n)
            ix_train[k] = ix[:n/2]
            ix_test[k] = ix[n/2:]

            X.append(labels[k, a, 'trl'][ix_train[k],:])
            Y.append([k]*len(ix_train[k]))

            X_test.append(labels[k, a, 'trl'][ix_test[k], :])
            Y_test.append([k]*len(ix_test[k]))

        Y_train = np.hstack((Y))
        lda.fit(np.vstack((X)), Y_train)
        y_true = lda.predict(np.vstack((X)))
        train[a] = np.sum(y_true==np.hstack((Y)))/float(len(y_true))

        y_pred = lda.predict(np.vstack(X_test))
        test[a] = np.sum(y_pred == np.hstack((Y_test)))/float(len(y_pred))

        chance[a] = []
        for i in range(100):
            ix = np.random.permutation(len(Y_train))
            lda.fit(np.vstack((X)), Y_train[ix])
            y_pred = lda.predict(np.vstack(X))
            chance[a].append(np.sum(y_pred == Y_train[ix])/float(len(y_pred)))
    plt.show()
    return train, test, chance
Beispiel #35
0
def classify(sx, sy, tx, ty):
    clf = LDA()
    clf.fit(sx, sy)
    py = clf.predict(tx)
    return accuracy_score(ty, py)
Beispiel #36
0
N_st = np.sum(y == 0)
N_rr = N_tot - N_st
N_train = len(y_train)
N_test = len(y_test)
N_plot = 5000 + N_rr

#----------------------------------------------------------------------
# perform LDA
classifiers = []
predictions = []
Ncolors = np.arange(1, X.shape[1] + 1)

for nc in Ncolors:
    clf = LDA()
    clf.fit(X_train[:, :nc], y_train)
    y_pred = clf.predict(X_test[:, :nc])

    classifiers.append(clf)
    predictions.append(y_pred)

completeness, contamination = completeness_contamination(predictions, y_test)

print("completeness", completeness)
print("contamination", contamination)

#------------------------------------------------------------
# Compute the decision boundary
clf = classifiers[1]
xlim = (0.7, 1.35)
ylim = (-0.15, 0.4)
Beispiel #37
0
	for point in DataSet:
		for f in range(P):
			if point[f] == "NaN":
				point[f] = SampleMean[f] 

	return DataSet

TestSetNum = 7	#ratio of DataSet : TestSet
impute_mean(DataSet)
for i in range(N):
	if i%TestSetNum == 0:
		TestSet.append(DataSet[i])
		Y_test.append(Y_data[i])
	else :
		TrainSet.append(DataSet[i])
		Y_train.append(Y_data[i])

print len(TrainSet),len(TestSet)
# data has been split into TrainSet and TestSet

import numpy as np
from sklearn.lda import LDA

clf = LDA()
clf.fit(np.array(TrainSet),np.array(Y_train))

output = clf.predict(TestSet)
collect_stat(Y_test, output)	


Beispiel #38
0
def LDA_onData():
    #Parsing Full training dataset
    XFull = common.parseFile('../UCI HAR Dataset/train/X_train.txt')
    YFull = common.parseFile('../UCI HAR Dataset/train/y_train.txt')

    #Parsing Full testing dataset
    XFullTest = common.parseFile('../UCI HAR Dataset/test/X_test.txt')
    YFullTest = common.parseFile('../UCI HAR Dataset/test/y_test.txt')
    #################################################################################################################################
    #Getting the dataset associated with Dynamic Activities on training
    X_Dynamic, Y_Dynamic = common.getDataSubset(XFull, YFull.flatten(),
                                                [1, 2, 3, 4, 5, 6])
    #Getting the dataset associated with Dynamic Activities on testing
    X_DynamicTest, Y_DynamicTest = common.getDataSubset(
        XFullTest, YFullTest.flatten(), [1, 2, 3, 4, 5, 6])

    #Fitting data using LDA classifier
    clf = LDA()
    clf.fit(X_Dynamic, Y_Dynamic.flatten())

    precision, recall, fscore = common.checkAccuracy(
        clf.predict(X_DynamicTest), Y_DynamicTest, [1, 2, 3, 4, 5, 6])
    print(
        common.createConfusionMatrix(
            clf.predict(X_DynamicTest).flatten(), Y_DynamicTest.flatten(),
            [1, 2, 3, 4, 5, 6]))

    print(fscore)
    #################################################################################################################################
    #Getting the dataset associated with Dynamic Activities on training
    X_Dynamic, Y_Dynamic = common.getDataSubset(XFull, YFull.flatten(),
                                                [1, 2, 3, 4, 5, 6])
    #Getting the dataset associated with Dynamic Activities on testing
    X_DynamicTest, Y_DynamicTest = common.getDataSubset(
        XFullTest, YFullTest.flatten(), [1, 2, 3, 4, 5, 6])

    X_Dynamic = common.getPowerK(X_Dynamic, [1, 2])
    X_DynamicTest = common.getPowerK(X_DynamicTest, [1, 2])

    #Fitting data using LDA classifier
    clf = LDA()
    clf.fit(X_Dynamic, Y_Dynamic.flatten())

    precision, recall, fscore = common.checkAccuracy(
        clf.predict(X_DynamicTest), Y_DynamicTest, [1, 2, 3, 4, 5, 6])
    print(
        common.createConfusionMatrix(
            clf.predict(X_DynamicTest).flatten(), Y_DynamicTest.flatten(),
            [1, 2, 3, 4, 5, 6]))

    print(fscore)
    #################################################################################################################################
    #Getting the dataset associated with Dynamic Activities on training
    X_Dynamic, Y_Dynamic = common.getDataSubset(XFull, YFull.flatten(),
                                                [1, 2, 3, 4, 5, 6])
    #Getting the dataset associated with Dynamic Activities on testing
    X_DynamicTest, Y_DynamicTest = common.getDataSubset(
        XFullTest, YFullTest.flatten(), [1, 2, 3, 4, 5, 6])

    X_Dynamic = common.getPowerK(X_Dynamic, [1, 2, 3])
    X_DynamicTest = common.getPowerK(X_DynamicTest, [1, 2, 3])

    #Fitting data using LDA classifier
    clf = LDA()
    clf.fit(X_Dynamic, Y_Dynamic.flatten())

    precision, recall, fscore = common.checkAccuracy(
        clf.predict(X_DynamicTest), Y_DynamicTest, [1, 2, 3, 4, 5, 6])
    print(
        common.createConfusionMatrix(
            clf.predict(X_DynamicTest).flatten(), Y_DynamicTest.flatten(),
            [1, 2, 3, 4, 5, 6]))

    print(fscore)
    #################################################################################################################################
    #Getting the dataset associated with Dynamic Activities on training
    X_Dynamic, Y_Dynamic = common.getDataSubset(XFull, YFull.flatten(),
                                                [1, 2, 3])
    #Getting the dataset associated with Dynamic Activities on testing
    X_DynamicTest, Y_DynamicTest = common.getDataSubset(
        XFullTest, YFullTest.flatten(), [1, 2, 3])
    print(len(X_DynamicTest), len(Y_DynamicTest))

    #Fitting data using LDA classifier
    clf = LDA()
    clf.fit(X_Dynamic, Y_Dynamic.flatten())

    precision, recall, fscore = common.checkAccuracy(
        clf.predict(X_DynamicTest), Y_DynamicTest, [1, 2, 3])
    print(
        common.createConfusionMatrix(
            clf.predict(X_DynamicTest).flatten(), Y_DynamicTest.flatten(),
            [1, 2, 3]))
    print(fscore)
    #################################################################################################################################
    #Getting the dataset associated with Dynamic Activities on training
    X_Dynamic, Y_Dynamic = common.getDataSubset(XFull, YFull.flatten(),
                                                [1, 2, 3])
    #Getting the dataset associated with Dynamic Activities on testing
    X_DynamicTest, Y_DynamicTest = common.getDataSubset(
        XFullTest, YFullTest.flatten(), [1, 2, 3])

    X_Dynamic = common.getPowerK(X_Dynamic, [1, 2])
    X_DynamicTest = common.getPowerK(X_DynamicTest, [1, 2])

    #Fitting data using LDA classifier
    clf = LDA()
    clf.fit(X_Dynamic, Y_Dynamic.flatten())

    precision, recall, fscore = common.checkAccuracy(
        clf.predict(X_DynamicTest), Y_DynamicTest, [1, 2, 3])
    print(
        common.createConfusionMatrix(
            clf.predict(X_DynamicTest).flatten(), Y_DynamicTest.flatten(),
            [1, 2, 3]))
    print(fscore)
    #################################################################################################################################
    #Getting the dataset associated with Dynamic Activities on training
    X_Dynamic, Y_Dynamic = common.getDataSubset(XFull, YFull.flatten(),
                                                [1, 2, 3])
    #Getting the dataset associated with Dynamic Activities on testing
    X_DynamicTest, Y_DynamicTest = common.getDataSubset(
        XFullTest, YFullTest.flatten(), [1, 2, 3])

    X_Dynamic = common.getPowerK(X_Dynamic, [1, 2, 3])
    X_DynamicTest = common.getPowerK(X_DynamicTest, [1, 2, 3])

    #Fitting data using LDA classifier
    clf = LDA()
    clf.fit(X_Dynamic, Y_Dynamic.flatten())

    precision, recall, fscore = common.checkAccuracy(
        clf.predict(X_DynamicTest), Y_DynamicTest, [1, 2, 3])
    print(
        common.createConfusionMatrix(
            clf.predict(X_DynamicTest).flatten(), Y_DynamicTest.flatten(),
            [1, 2, 3]))
    print(fscore)
    #################################################################################################################################
    #Getting the dataset associated with Non-Dynamic Activities on training
    X_NonDynamic, Y_NonDynamic = common.getDataSubset(XFull, YFull.flatten(),
                                                      [4, 5, 6])
    #Getting the dataset associated with Non-Dynamic Activities on testing
    X_NonDynamicTest, Y_NonDynamicTest = common.getDataSubset(
        XFullTest, YFullTest.flatten(), [4, 5, 6])

    #Fitting data using LDA classifier

    clf = LDA()
    clf.fit(X_NonDynamic, Y_NonDynamic.flatten())

    precision, recall, fscore = common.checkAccuracy(
        clf.predict(X_NonDynamicTest), Y_NonDynamicTest, [4, 5, 6])
    print(
        common.createConfusionMatrix(
            clf.predict(X_NonDynamicTest).flatten(),
            Y_NonDynamicTest.flatten(), [4, 5, 6]))
    print(fscore)
    #################################################################################################################################
    #Getting the dataset associated with Dynamic Activities on training
    X_Dynamic, Y_Dynamic = common.getDataSubset(XFull, YFull.flatten(),
                                                [4, 5, 6])
    #Getting the dataset associated with Dynamic Activities on testing
    X_DynamicTest, Y_DynamicTest = common.getDataSubset(
        XFullTest, YFullTest.flatten(), [4, 5, 6])

    X_Dynamic = common.getPowerK(X_Dynamic, [1, 2])
    X_DynamicTest = common.getPowerK(X_DynamicTest, [1, 2])

    #Fitting data using LDA classifier
    clf = LDA()
    clf.fit(X_Dynamic, Y_Dynamic.flatten())

    precision, recall, fscore = common.checkAccuracy(
        clf.predict(X_DynamicTest), Y_DynamicTest, [4, 5, 6])
    print(
        common.createConfusionMatrix(
            clf.predict(X_DynamicTest).flatten(), Y_DynamicTest.flatten(),
            [4, 5, 6]))
    print(fscore)
    #################################################################################################################################
    #Getting the dataset associated with Dynamic Activities on training
    X_Dynamic, Y_Dynamic = common.getDataSubset(XFull, YFull.flatten(),
                                                [4, 5, 6])
    #Getting the dataset associated with Dynamic Activities on testing
    X_DynamicTest, Y_DynamicTest = common.getDataSubset(
        XFullTest, YFullTest.flatten(), [4, 5, 6])

    X_Dynamic = common.getPowerK(X_Dynamic, [1, 2, 3])
    X_DynamicTest = common.getPowerK(X_DynamicTest, [1, 2, 3])

    #Fitting data using LDA classifier
    clf = LDA()
    clf.fit(X_Dynamic, Y_Dynamic.flatten())

    precision, recall, fscore = common.checkAccuracy(
        clf.predict(X_DynamicTest), Y_DynamicTest, [4, 5, 6])
    print(
        common.createConfusionMatrix(
            clf.predict(X_DynamicTest).flatten(), Y_DynamicTest.flatten(),
            [4, 5, 6]))
    print(fscore)
Beispiel #39
0
import pandas as pd
import numpy as np
from sklearn.lda import LDA

## read files
train = pd.read_csv('data/spam_train.csv')
test = pd.read_csv('data/spam_test.csv')

x = np.array(train.iloc[:, 0:57])
y = np.ravel(train.iloc[:, -1])

## separate the predictors and response in the test data set
x2 = np.array(test.iloc[:, 0:57])
y2 = np.ravel(test.iloc[:, -1])

## fit the model using lda
lda_cls = LDA()
lda_cls.fit(x, y)
print("(1): lda accuracy")
print(lda_cls.score(x, y))

## predict output on test data set with lda
predict = lda_cls.predict(x2)
print("(2): lda test accuracy")
print(lda_cls.score(x2, y2))
Beispiel #40
0
                                        'n_neighbors', 5)

pd.DataFrame(np.array([test_scores, nns]).T,
             columns=['Test Accuracy', 'Number of Nearest Neighbours'])
##Very low accuray in the results. best is 1 neighbour
###Training Accuracy
accuracy_score(Y_test, knnFit.predict(X_test_01))  #worse then guessing, 18.16%

#LDA
#Since we dont have many parameters to vary for LDA, we run it as is to see the results:
folds = KFold(n=X_train_01.shape[0], n_folds=10)
ldaAccuracyScores = []
for train_fold, test_fold in folds:
    ldaFit = LDA().fit(X_train_01[train_fold], Y_train[train_fold])
    accuracy = accuracy_score(Y_train[test_fold],
                              ldaFit.predict(X_train_01[test_fold]))
    ldaAccuracyScores.append(accuracy)
ldaAccuracyScores = np.array(ldaAccuracyScores)
print('the mean accuracy through LDA on training data is %0.2f' %
      ldaAccuracyScores.mean())

ldaFit = LDA().fit(X_train_01, Y_train)
accuracy_score(
    Y_test,
    ldaFit.predict(X_test_01))  #highest accuracy of 63.67%; best accuracy

#The HofF variable
###Since the HofF variable is very unbalanced, we stick to ensemble based approaches AdaBoost, Random Forest
###Our main metric for performance is the senstivity NOT accuracy
#Stratified Test-train split
from sklearn.cross_validation import StratifiedShuffleSplit
Beispiel #41
0
for i in range(len(labels)):
	clf = LDA()
	#trainMat = repubAndDemMatrix
	#trainLabels = labels
	trainMat = np.concatenate((repubAndDemMatrix[0:i],repubAndDemMatrix[i+1:sz]), axis = 0)
	trainLabels = np.concatenate((labels[0:i], labels[i+1:sz]), axis = 0)
	#trainMat = repubAndDemMatrix[0:163]
	#trainLabels = labels[0:163]
	#print type(trainMat)
	#print type(trainLabels)
	#trainLabels = labels[0:i] + labels[i+1:sz]
	clf.fit(trainMat, trainLabels)
	#clf.fit(repubAndDemMatrix, labels)
	#clf = getLDAMat(trainMat, trainLabels, 5);
	if clf.predict([repubAndDemMatrix[i].tolist()]) == labels[i]:
		totalCorrect = totalCorrect + 1
	# print clf.coef_
	print(i)
	predicted = clf.predict([repubAndDemMatrix[i].tolist()])
	print 'predicted =', predicted, '; actual =', labels[i]
	
	if labels[i] == 0:
		trueDem += 1
	else:
		trueRep += 1
	if predicted == 0:
		predDem += 1
	else:
		predRep +=1
	
Beispiel #42
0
# -*- coding: utf-8 -*-
"""
Created on Wed May 18 16:57:23 2016

@author: siham.belgadi
"""

import numpy as np
from sklearn.lda import LDA

X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
y = np.array([1, 1, 1, 2, 2, 2])
clf = LDA()
clf.fit(X, y)
LDA(n_components=None,
    priors=None,
    shrinkage=None,
    solver='svd',
    store_covariance=False,
    tol=0.0001)
print(clf.predict([[-0.8, -1]]))
Beispiel #43
0
        Cov[:, :,
            b] += np.cov(np.transpose(new)) + np.eye(inputs.shape[1]) * 1e-9
    return Mean, Covar, Prob


def QDA_predict(inputs, Prob, covariance, mean):
    B = np.zeros([len(Prob), len(inputs)])
    for b in range(len(Prob)):
        Mat = np.linalg.inv(covariance[:, :, b])
        A = -1 / 2 * np.log(np.linalg.det(covariance[:, :, b]) +
                            1e-10) + np.log(Prob[b])
        B[b, :] = np.array([
            A - (1 / 2 * np.dot(np.dot(a - mean[b], Mat), a - mean[b]))
            for a in inputs
        ])
    return np.argmax(B, axis=0)


semg = scipy.io.loadmat('./data/subject-0/motion-fist/trial-0.csv')

a, b, c, d = split_set(training_data, 10000, training_label)
Mean, Covar, P = LDA_fit(a, c)
prediction = LDA_predict(a, Covar, Mean, P)
print("accuracy:", 1 - (np.sum(c != prediction) / len(c)))

#sklearn implementation
classify = LDA()
classify.fit(a, c)
classify.predict(a)
print("accuracy:", 1 - (np.sum(c != prediction) / len(c)))
Beispiel #44
0
        perc.append(float("{0:.2f}".format(cm[i][i] / count[i] * 100)))
    return perc


def overall_accuracy(cm, y_test):
    sum = 0
    for i in range(6):
        sum += cm[i][i]
    return float("{0:.2f}".format(sum * 100.0 / y_test.size))


#######LDA#####################################################################

lda = LDA()
lda.fit(X_train, y_train)
y_predict_lda = lda.predict(X_test)

y_pred_count_lda = total_count(y_predict_lda)
cmatrix_lda = confusion_matrix(y_test, y_predict_lda)

print "\nLDA:"
print cmatrix_lda
print ""

recall_lda = pre_rec(cmatrix_lda, y_test_count)
precision_lda = pre_rec(cmatrix_lda, y_pred_count_lda)
accuracy_lda = overall_accuracy(cmatrix_lda, y_test)

print "Precision for LDA: "
print precision_lda
print "Recall for LDA: "
Beispiel #45
0
    #ws.var_.xvschema = scot.xvschema.singletrial
    #ws.optimize_var()
    ws.var_.delta = 1

    # Single-Trial Fitting and feature extraction
    features = np.zeros((len(triggers), 32))
    for t in range(len(triggers)):
        print('Fold %d/%d, Trial: %d   ' %(fold, nfolds, t), end='\r')
        ws.set_data(data[:, :, t])
        ws.fit_var()

        con = ws.get_connectivity('ffPDC')

        alpha = np.mean(con[:, :, np.logical_and(7 < freq, freq < 13)], axis=2)
        beta = np.mean(con[:, :, np.logical_and(15 < freq, freq < 25)], axis=2)

        features[t, :] = np.array([alpha, beta]).flatten()

    lda.fit(features[train, :], classids[train])

    acc_train = lda.score(features[train, :], classids[train])
    acc_test = lda.score(features[test, :], classids[test])

    print('Fold %d/%d, Acc Train: %.4f, Acc Test: %.4f' %(fold, nfolds, acc_train, acc_test))

    pred = lda.predict(features[test, :])
    cm += confusion_matrix(classids[test], pred)
print('Confusion Matrix:\n', cm)

print('Total Accuracy: %.4f'%(np.sum(np.diag(cm))/np.sum(cm)))
Beispiel #46
0
class minDistance():

  def __init__(self, dataName, p, k):
    data = parse(dataName)
    npData = numpy.array(data, dtype=numpy.dtype(decimal.Decimal))

    self.X = npData[:,:-1].astype(numpy.float)
    self.Y = npData[:,-1].astype(numpy.integer)
  
    self._ret = []
    self._p = p
    self._k = k

  @property
  def ret(self):
      return self._ret

  def process(self, fold=2):
    self.crossValidation(self.trainFunc, self.testFunc)

  def crossValidation(self, cbTrain, cbTest, fold=2):
    X = self.X
    Y = self.Y

    kFold = cross_validation.KFold(n=Y.size, n_folds=fold, shuffle=True,
                             random_state=numpy.random.randint(1,16384))

    for train_index, test_index in kFold:
      #print("TRAIN:", train_index, "TEST:", test_index)
      X_train, X_test = X[train_index,:], X[test_index,:]
      Y_train, Y_test = Y[train_index], Y[test_index]

      cbTrain(X_train, Y_train)
      cbTest(X_test, Y_test)

  def trainFunc(self,X,Y):
    self.lda = LDA(n_components=2)
    self.lda.fit(X, Y)


  def testFunc(self,X,Y):
    d = distance(self._p)

    classNum = max(Y)+1
    ok = 0
    for line in range(0,Y.size):
      shouldBe = Y[line]
      given = X[line,:]
      chosen = {}
      calced = self.lda.predict( given )
      
      ok += calced == shouldBe

    #print 'Test: %s %%' % (100.0*ok/Y.size)
    self._ret.append(1.0*ok/Y.size)

  def plot(self):
    import pylab as pl
    self.lda = LDA(n_components=2)
    
    X = self.lda.fit(self.X, self.Y).transform(self.X)
    Y = self.Y

    for k in range(0,max(Y)+1):
      color = 'r' if k ==0 else 'g' if k == 1 else 'b'
      pl.plot( X[Y==k,0], X[Y==k,1], 'o'+color )

    pl.show()

    pass
    Fteste = np.nan_to_num((Fteste-Mteste) / Dteste)
    
    # LDA
    
    Xtreino = Ftreino
    Xteste = Fteste
    y = np.array([i for i in py.flatten([[i]*10 for i in range(12)])])
    target_names = np.array(conf.artistas)
    
    # aplicamos LDA no conjunto de treino e teste (após fitar... treinar com o
    # conjunto de treino)
    lda = LDA(n_components=2)
    # lda.fit(Xtreino, y, store_covariance=True)
    Xtreino_r2 = lda.fit(Xtreino, y, store_covariance=True).transform(Xtreino)
    
    y_pred = lda.predict(Xteste)
    print y_pred
    cm = confusion_matrix(y, y_pred)
    cms.append(cm)
    print 'cm', cm

cm_media = sum([np.array(cm, dtype=float) for cm in cms]) / N
print cm_media
fig = plt.figure()
ax = plt.subplot(111)
cax = ax.matshow(cm_media, interpolation='nearest', cmap=py.cm.jet)
#py.title('Confusion matrix')
plt.colorbar(cax)
plt.ylabel('True paintings', fontsize=11)
plt.xlabel('Predicted paintings', fontsize=11)
dialabels = [r'Caravaggio',
Beispiel #48
0
def classify(images, classes_list, train_set, test_set, pos_fold, descriptor,
        parameters):
    """
    Performs the classification of the test_set according to the train_set.
    """
    
    print "Classification: LDA"
    
    #Paths
    #dirname = os.path.abspath(os.path.join(os.path.dirname(__file__)))
    temp_path = os.path.abspath(os.path.join(dirname, "..", "..", "temp"))
    model_path = os.path.join(temp_path, "iteration:" + str(iteration) + \
            "-LDA_" + str(pos_fold) + ".model")
    
    #Preprocess each class to a unique value to the classification
    label_encoder = preprocessing.LabelEncoder()
    label_encoder.fit(classes_list)
    print "List of classes of this experiment:", label_encoder.classes_
    
    #Read the train list and save the list of class and the list
    #of feature vectors
    list_class = []
    list_fv = []
    
    for img in train_set:
        list_class.append(images[img][POS_CLASSES][INDEX_ZERO])
        list_fv.append(numpy.array(images[img][POS_FV][INDEX_ZERO]))
    
    list_train = numpy.array(list_fv)
    list_train_class = numpy.array(list_class)
    
    #Given a list of classes, transform each value in this list to a integer
    list_train_class = label_encoder.transform(list_train_class)
    
    #Read the test list and save the list of class and the list
    #of feature vectors
    list_img = test_set
    list_class = []
    list_fv = []
    
    for img in test_set:
        list_class.append(images[img][POS_CLASSES][INDEX_ZERO])
        list_fv.append(numpy.array(images[img][POS_FV][INDEX_ZERO]))
    
    list_test = numpy.array(list_fv)
    list_test_class = numpy.array(list_class)
    
    #Classification
    #--------------------------------------------------------------------------
    n_comp = parameters["Components"]
    if n_comp > len(label_encoder.classes_) - 1:
        n_comp = len(label_encoder.classes_) - 1
    clf = LDA(n_components=n_comp)
              
    
    #Fit
    print "\tFit: Beginning"
    clf.fit(list_train, list_train_class)
    print "\tFit: Done!"
    
    #Save configuration of the LDA
    model_paths = joblib.dump(clf, model_path)
    
    #Predict
    print "\tPredict: Beginning"
    list_predict = clf.predict(list_test)
    print "\tPredict: Done"
    
    #Mapping the results into integers
    list_predict = map(int, list_predict)
    #Returning the result to strings
    list_predict = label_encoder.inverse_transform(list_predict)
    
    list_result = []
    for predict in list_predict:
        img_result = [0] * len(label_encoder.classes_)
        #Find all predict in the list label_encoder.classes_ and grab the
        #first index
        pos = label_encoder.classes_.tolist().index(predict)
        img_result[pos] = 1
        list_result.append(img_result)
    
    #--------------------------------------------------------------------------
    
    return list_img, list_test_class, list_result, label_encoder.classes_, \
           model_paths
Beispiel #49
0
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d import proj3d
import sklearn.linear_model as LM
import numpy as np
from sklearn.metrics import precision_recall_fscore_support

fname = "./3_que_data/train.csv"
train_X = np.genfromtxt(fname, delimiter=",")
train_Y = np.genfromtxt("./3_que_data/train_labels.csv", delimiter=",")

test_X = np.genfromtxt("./3_que_data/test.csv", delimiter=",")
test_Y = np.genfromtxt("./3_que_data/test_labels.csv", delimiter=",")

clf = LDA()
clf.fit(train_X, train_Y)

train_X_transformed = clf.transform(train_X)
train_X_transformed = train_X_transformed.flatten()
print train_X_transformed.shape
print clf.coef_

plt.plot(train_X_transformed[:1000], [10] * 1000, "ro", label="Class 1")
plt.plot(train_X_transformed[1000:], [10] * 1000, "bo", label="Class 2")
plt.plot([0] * 21, range(21), "g", label="Decision Boundary")
plt.axis([-6, 6, 0, 20])
plt.xlabel("X-axis")
plt.ylabel("Y-axis")
plt.legend()
plt.show()
print precision_recall_fscore_support(test_Y, clf.predict(test_X), labels=[1, 2])
Beispiel #50
0
from sklearn.metrics import confusion_matrix

#Somente o nome do arquivo
if __name__=='__main__':

    for file in glob.glob(sys.argv[1]+'*.mat'):
        data = scipy.io.loadmat(file)
        X_train = data['Xtrain']
        y_train = data['Ytrain'].T

        print("Treinando LDA...")
        lda = LDA()

        ytrain = data['Ytrain'].T.reshape(data['Ytrain'].shape[1])
        lda.fit(data['Xtrain'].toarray(), ytrain)
        predict = lda.predict(data['Xval'].toarray())

        yVal = data['Yval'].T.reshape(data['Yval'].shape[1])
        print "Acuracia: ", sklearn.metrics.accuracy_score(yVal, predict)
        X_train = data["Xtrain"]
        X_val = data["Xval"]
        X_test, y_test = data["Xtest"], data["Ytest"]

        cm = confusion_matrix(yVal, predict)
        total = numpy.sum(cm, axis=1)

        if(cm.shape[0] < 2):
            acc = 1.0
        else:
            acc = []
            for i in range(total.shape[0]):
Beispiel #51
0
import numpy as np
from sklearn.lda import LDA
from sklearn.metrics import accuracy_score

from util import DataReader, partition_data

fp = '../data/E-GEOD-48350/E-GEOD-48350-combined.csv'

x, y = DataReader(fp).get_data()
argmax = lambda x: x[0] if x[0] == 1 else x[1]
y = list(map(argmax, y))
partition = partition_data(x, y, [0.8, 0.2])

mli = lambda x: np.array(x).astype(float)
train_x = mli(partition[0][0])
train_y = mli(partition[0][1])
test_x = mli(partition[1][0])
test_y = mli(partition[1][1])

lda = LDA(n_components=2, shrinkage='auto', solver='lsqr')
lda.fit(train_x, train_y)
test_y_pred = lda.predict(test_x)
print(accuracy_score(test_y_pred, test_y))
Beispiel #52
0
def mlda():
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
    y = np.array([1, 1, 1, 2, 2, 2])
    clf = LDA()    #LDA(n_components=None, priors=None, shrinkage=None, solver='svd', store_covariance=False, tol=0.0001)
    clf.fit(X, y)  #训练
    print(clf.predict([[-0.8, -1]]))  #预测
Beispiel #53
0
print 'LDA result 1:', lda_result1.shape
lda = LDA(n_components=1)
lda_result2 = lda.fit_transform(iris.data, iris.target)
print 'LDA result 2:', lda_result2.shape

# Visualization
import matplotlib.pyplot as plt
plt.subplot(1,2,1)
plt.scatter(lda_result1[iris.target==0, 0], lda_result1[iris.target==0, 1], color='r')
plt.scatter(lda_result1[iris.target==1, 0], lda_result1[iris.target==1, 1], color='g') 
plt.scatter(lda_result1[iris.target==2, 0], lda_result1[iris.target==2, 1], color='b') 
plt.title('LDA on iris (1)')

plt.subplot(1,2,2)
plt.stem(lda_result2)
plt.title('LDA on iris (2)')

plt.show()


# Classification
x_train_set = iris.data[:-5]
y_train_set = iris.target[:-5]
x_test_set = iris.data[-5:]
y_test_set = iris.target[-5:]
clf = LDA()
clf.fit(x_train_set, y_train_set)
y_pre = clf.predict(x_test_set)
print 'y_pre = \n', y_pre
print 'y_corret = \n', y_test_set
#PLS Dimension Reduction
pls2 = PLSRegression(n_components=n_components)
pls2.fit(features, MA_label)
XScore = pls2.transform(features)
# XScore = features

#LDA Classification
kf = KFold(n_splits=5)
kf.get_n_splits(XScore)
mean_acc = 0
for train_index, test_index in kf.split(XScore):
    X_train, X_test = XScore[train_index], XScore[test_index]
    y_train, y_test = MA_label[train_index], MA_label[test_index]
    clf = LDA()
    clf.fit(X_train, y_train)
    Y_predict = clf.predict(X_test)
    for i in range(len(Y_predict)):
        print("Y_Predict {} - Y_Test {}".format(Y_predict[i], y_test[i]))
    acc = accuracy_score(Y_predict, y_test)
    print("Accuracy = {}".format(acc))
    mean_acc = mean_acc + acc

mean_acc = (mean_acc / 5) * 100
print("Accuracy is {}".format(mean_acc))

with open("Results/MLL.csv", 'a') as csvFile:
    writer = csv.writer(csvFile)
    writer.writerow([numFeatures, mean_acc])
    csvfile.close()
Beispiel #55
0
# *****************************************************************************
# Linear Discriminant Analysis
from sklearn import datasets
from sklearn import metrics
from sklearn.lda import LDA

# load the iris datasets
dataset = datasets.load_iris()

# fit a LDA model to the data
model = LDA()
model.fit(dataset.data, dataset.target)
print(model)

# make predictions
expected = dataset.target
predicted = model.predict(dataset.data)

# summarize the fit of the model
print(metrics.classification_report(expected, predicted))
print(metrics.confusion_matrix(expected, predicted))