Python Standardiser.Standardiserの例

プログラミング言語: Python

名前空間/パッケージ名: apgl.data.Standardiser

クラス/型: Standardiser

メソッド/関数: Standardiser

hotexamples.comのコード掲載数: 5

Python Standardiser.Standardiser - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのapgl.data.Standardiser.Standardiser.Standardiserの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Standardiser(5)

normaliseArray(5)

centreArray(2)

getCentreVector(2)

dot(1)

getNormVector(1)

standardiseArray(1)

unstandardiseArray(1)

コード例 #1

ファイルを表示

ファイル: LinearSvmFGs.py プロジェクト: rezaarmand/sandbox

    def learnModel(self, X, y, folds=3):
        """
        Train using the given examples and labels, however first conduct grid
        search in conjunction with cross validation to find the best parameters.
        We also conduct filtering with a variety of values. 
        """
        #Hard coding this is bad
        Cs = 2**numpy.arange(-2, 7, dtype=numpy.float)
        #Cs = numpy.array([0.1, 2.0])

        if self.waveletInds == None:
            self.waveletInds = numpy.arange(X.shape[1])

        nonWaveletInds = numpy.setdiff1d(numpy.arange(X.shape[1]),  self.waveletInds)

        Xw = X[:, self.waveletInds]
        Xo = X[:, nonWaveletInds]

        featureInds = numpy.flipud(numpy.argsort(numpy.sum(Xw**2, 0)))
        meanAUCs = numpy.zeros((Cs.shape[0], self.candidatesN.shape[0]))
        stdAUCs = numpy.zeros((Cs.shape[0], self.candidatesN.shape[0]))

        #Standardise the data
        Xw = Standardiser().standardiseArray(Xw)
        Xo = Standardiser().standardiseArray(Xo)

        for i in range(Cs.shape[0]):
            for j in range(self.candidatesN.shape[0]):
                self.linearSVM.setC(Cs[i])
                newX = numpy.c_[Xw[:, featureInds[0:self.candidatesN[j]]], Xo]
                meanAUCs[i, j], stdAUCs[i, j] = self.linearSVM.evaluateStratifiedCv(newX, y, folds, metricMethod=Evaluator.auc)

        (bestI, bestJ) = numpy.unravel_index(numpy.argmax(meanAUCs), meanAUCs.shape)
        self.linearSVM.setC(Cs[bestI])
        self.featureInds = numpy.r_[self.waveletInds[featureInds[0:self.candidatesN[bestJ]]], nonWaveletInds]
        logging.debug("Best learner found: " + str(self.linearSVM) + " N:" + str(self.candidatesN[bestJ]))

        self.standardiser = Standardiser()
        newX = self.standardiser.standardiseArray(X[:, self.featureInds])
        self.linearSVM.learnModel(newX, y)

コード例 #2

ファイルを表示

    def saveResults(self, leafRankGenerators, standardise=True):
        """
        Compute the results and save them for a particular hormone. Does so for all
        leafranks
        """
        j = 0
        nonNaInds = self.YList[j][1]
        hormoneInd = self.hormoneInds[j]

        k = 2
        if type(self.X) == numpy.ndarray:
            X = self.X[nonNaInds, :]
        else:
            X = self.X[j][nonNaInds, :]
        X = numpy.c_[X, self.ages[nonNaInds]]
        if standardise:
            X = Standardiser().standardiseArray(X)
        Y = hormoneInd[k]

        waveletInds = numpy.arange(X.shape[1]-1)

        logging.debug("Shape of examples: " + str(X.shape))
        logging.debug("Distribution of labels: " + str(numpy.bincount(Y)))

        #pca = decomp.PCA(n_components=40)
        #X = pca.fit_transform(X)
        #print(X.shape)

        #Go through all the leafRanks
        for i in range(len(leafRankGenerators)):
            #Compute TreeRankForest here
            fileName = self.resultsDir + "TreeRankForest-" + self.hormoneNames[j] + "_" + str(k) + "-" +  leafRankGenerators[i][1]  + "-" + self.featuresName +  ".dat"
            try:
                logging.debug("Computing file " + fileName)
                #treeRankForest = TreeRankForest(self.funcLeafRankGenerators[0][0](waveletInds))
                treeRankForest = TreeRankForest(self.leafRankGenerators[0][0])
                treeRankForest.setMaxDepth(10)
                treeRankForest.setNumTrees(5)
                #Setting this low definitely helps 
                #treeRankForest.setFeatureSize(1.0)
                treeRankForest.setFeatureSize(0.05)
                #The following 2 lines definitely improve stability and the AUC 
                treeRankForest.setSampleSize(1.0)
                #Setting this to true results in slightly worse results 
                treeRankForest.setSampleReplace(True)
                mean, var = treeRankForest.evaluateStratifiedCv(X, Y, self.folds, metricMethod=Evaluator.auc)
                print(mean)

                #treeRank = TreeRank(self.leafRankGenerators[0][0])
                #treeRank.setMaxDepth(self.maxDepth)
                #(bestParams, allMetrics, bestMetaDicts) = treeRank.evaluateCvOuter(X, Y, self.folds)
                #print(str(allMetrics))


                #Util.savePickle(cvResults, fileName)
            except:
                logging.debug("Caught an error in the code ... skipping")
                raise
            else:
                logging.debug("File exists: " + fileName)
        return

コード例 #3

ファイルを表示

 def __init__(self, learningAlg, windowSize, preprocessor=Standardiser()):
     
     self.windowSize = windowSize
     self.learningAlg = learningAlg
     self.preprocessor = preprocessor 
     self.printStep = 50

コード例 #4

ファイルを表示

ファイル: ClusterExp.py プロジェクト: rezaarmand/sandbox

"""
Compare the clustering methods in scikits.learn to see which ones are fastest
and most accurate 
"""
import time
import numpy
import sklearn.cluster as cluster
from apgl.data.Standardiser import Standardiser
import scipy.cluster.vq as vq

numExamples = 10000
numFeatures = 500

X = numpy.random.rand(numExamples, numFeatures)
X = Standardiser().standardiseArray(X)

k = 10
numRuns = 10
maxIter = 100
tol = 10**-4

intialCentroids = X[0:k, :]

#Quite fast
print("Running scikits learn k means")
clusterer = cluster.KMeans(k=k,
                           n_init=numRuns,
                           tol=tol,
                           init=intialCentroids,
                           max_iter=maxIter)
start = time.clock()

コード例 #5

ファイルを表示

numExamples = 100
numFeatures = 3
std = 0.1

V = numpy.random.rand(numExamples, numFeatures)
V[0:20, :] = numpy.random.randn(20, numFeatures) * std
V[0:20, 0:3] += numpy.array([1, 0.2, -1])

V[20:70, :] = numpy.random.randn(50, numFeatures) * std
V[20:70, 0:3] += numpy.array([-0.5, 1, -1])

V[70:, :] = numpy.random.randn(30, numFeatures) * std
V[70:, 0:3] += numpy.array([-0.3, 0.4, -0.1])

U = V - numpy.mean(V, 0)
U = Standardiser().normaliseArray(U.T).T

fig = plt.figure(0)
ax = fig.add_subplot(111, projection='3d')
ax.scatter(U[0:20, 0], U[0:20, 1], U[0:20, 2], c="red")
ax.scatter(U[20:70, 0], U[20:70, 1], U[20:70, 2], c="blue")
ax.scatter(U[70:, 0], U[70:, 1], U[70:, 2], c="green")

UU = U.dot(U.T)
#s, X = numpy.linalg.eig(UU)
X, a, Y = numpy.linalg.svd(U)

#Now compute true cluster error
k = 3
kmeans = sklearn.cluster.KMeans(k)
kmeans.fit(U)