Python Standardiser.Standardiser 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: apgl.data.Standardiser

클래스/타입: Standardiser

메소드/함수: Standardiser

hotexamples.com에서의 예제들: 5

Python Standardiser.Standardiser - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 apgl.data.Standardiser.Standardiser.Standardiser에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Standardiser(5)

normaliseArray(5)

centreArray(2)

getCentreVector(2)

dot(1)

getNormVector(1)

standardiseArray(1)

unstandardiseArray(1)

예제 #1

파일 보기

파일: LinearSvmFGs.py 프로젝트: rezaarmand/sandbox

    def learnModel(self, X, y, folds=3):
        """
        Train using the given examples and labels, however first conduct grid
        search in conjunction with cross validation to find the best parameters.
        We also conduct filtering with a variety of values. 
        """
        #Hard coding this is bad
        Cs = 2**numpy.arange(-2, 7, dtype=numpy.float)
        #Cs = numpy.array([0.1, 2.0])

        if self.waveletInds == None:
            self.waveletInds = numpy.arange(X.shape[1])

        nonWaveletInds = numpy.setdiff1d(numpy.arange(X.shape[1]),  self.waveletInds)

        Xw = X[:, self.waveletInds]
        Xo = X[:, nonWaveletInds]

        featureInds = numpy.flipud(numpy.argsort(numpy.sum(Xw**2, 0)))
        meanAUCs = numpy.zeros((Cs.shape[0], self.candidatesN.shape[0]))
        stdAUCs = numpy.zeros((Cs.shape[0], self.candidatesN.shape[0]))

        #Standardise the data
        Xw = Standardiser().standardiseArray(Xw)
        Xo = Standardiser().standardiseArray(Xo)

        for i in range(Cs.shape[0]):
            for j in range(self.candidatesN.shape[0]):
                self.linearSVM.setC(Cs[i])
                newX = numpy.c_[Xw[:, featureInds[0:self.candidatesN[j]]], Xo]
                meanAUCs[i, j], stdAUCs[i, j] = self.linearSVM.evaluateStratifiedCv(newX, y, folds, metricMethod=Evaluator.auc)

        (bestI, bestJ) = numpy.unravel_index(numpy.argmax(meanAUCs), meanAUCs.shape)
        self.linearSVM.setC(Cs[bestI])
        self.featureInds = numpy.r_[self.waveletInds[featureInds[0:self.candidatesN[bestJ]]], nonWaveletInds]
        logging.debug("Best learner found: " + str(self.linearSVM) + " N:" + str(self.candidatesN[bestJ]))

        self.standardiser = Standardiser()
        newX = self.standardiser.standardiseArray(X[:, self.featureInds])
        self.linearSVM.learnModel(newX, y)

예제 #2

파일 보기

    def saveResults(self, leafRankGenerators, standardise=True):
        """
        Compute the results and save them for a particular hormone. Does so for all
        leafranks
        """
        j = 0
        nonNaInds = self.YList[j][1]
        hormoneInd = self.hormoneInds[j]

        k = 2
        if type(self.X) == numpy.ndarray:
            X = self.X[nonNaInds, :]
        else:
            X = self.X[j][nonNaInds, :]
        X = numpy.c_[X, self.ages[nonNaInds]]
        if standardise:
            X = Standardiser().standardiseArray(X)
        Y = hormoneInd[k]

        waveletInds = numpy.arange(X.shape[1]-1)

        logging.debug("Shape of examples: " + str(X.shape))
        logging.debug("Distribution of labels: " + str(numpy.bincount(Y)))

        #pca = decomp.PCA(n_components=40)
        #X = pca.fit_transform(X)
        #print(X.shape)

        #Go through all the leafRanks
        for i in range(len(leafRankGenerators)):
            #Compute TreeRankForest here
            fileName = self.resultsDir + "TreeRankForest-" + self.hormoneNames[j] + "_" + str(k) + "-" +  leafRankGenerators[i][1]  + "-" + self.featuresName +  ".dat"
            try:
                logging.debug("Computing file " + fileName)
                #treeRankForest = TreeRankForest(self.funcLeafRankGenerators[0][0](waveletInds))
                treeRankForest = TreeRankForest(self.leafRankGenerators[0][0])
                treeRankForest.setMaxDepth(10)
                treeRankForest.setNumTrees(5)
                #Setting this low definitely helps 
                #treeRankForest.setFeatureSize(1.0)
                treeRankForest.setFeatureSize(0.05)
                #The following 2 lines definitely improve stability and the AUC 
                treeRankForest.setSampleSize(1.0)
                #Setting this to true results in slightly worse results 
                treeRankForest.setSampleReplace(True)
                mean, var = treeRankForest.evaluateStratifiedCv(X, Y, self.folds, metricMethod=Evaluator.auc)
                print(mean)

                #treeRank = TreeRank(self.leafRankGenerators[0][0])
                #treeRank.setMaxDepth(self.maxDepth)
                #(bestParams, allMetrics, bestMetaDicts) = treeRank.evaluateCvOuter(X, Y, self.folds)
                #print(str(allMetrics))


                #Util.savePickle(cvResults, fileName)
            except:
                logging.debug("Caught an error in the code ... skipping")
                raise
            else:
                logging.debug("File exists: " + fileName)
        return

예제 #3

파일 보기

 def __init__(self, learningAlg, windowSize, preprocessor=Standardiser()):
     
     self.windowSize = windowSize
     self.learningAlg = learningAlg
     self.preprocessor = preprocessor 
     self.printStep = 50

예제 #4

파일 보기

파일: ClusterExp.py 프로젝트: rezaarmand/sandbox

"""
Compare the clustering methods in scikits.learn to see which ones are fastest
and most accurate 
"""
import time
import numpy
import sklearn.cluster as cluster
from apgl.data.Standardiser import Standardiser
import scipy.cluster.vq as vq

numExamples = 10000
numFeatures = 500

X = numpy.random.rand(numExamples, numFeatures)
X = Standardiser().standardiseArray(X)

k = 10
numRuns = 10
maxIter = 100
tol = 10**-4

intialCentroids = X[0:k, :]

#Quite fast
print("Running scikits learn k means")
clusterer = cluster.KMeans(k=k,
                           n_init=numRuns,
                           tol=tol,
                           init=intialCentroids,
                           max_iter=maxIter)
start = time.clock()

예제 #5

파일 보기

numExamples = 100
numFeatures = 3
std = 0.1

V = numpy.random.rand(numExamples, numFeatures)
V[0:20, :] = numpy.random.randn(20, numFeatures) * std
V[0:20, 0:3] += numpy.array([1, 0.2, -1])

V[20:70, :] = numpy.random.randn(50, numFeatures) * std
V[20:70, 0:3] += numpy.array([-0.5, 1, -1])

V[70:, :] = numpy.random.randn(30, numFeatures) * std
V[70:, 0:3] += numpy.array([-0.3, 0.4, -0.1])

U = V - numpy.mean(V, 0)
U = Standardiser().normaliseArray(U.T).T

fig = plt.figure(0)
ax = fig.add_subplot(111, projection='3d')
ax.scatter(U[0:20, 0], U[0:20, 1], U[0:20, 2], c="red")
ax.scatter(U[20:70, 0], U[20:70, 1], U[20:70, 2], c="blue")
ax.scatter(U[70:, 0], U[70:, 1], U[70:, 2], c="green")

UU = U.dot(U.T)
#s, X = numpy.linalg.eig(UU)
X, a, Y = numpy.linalg.svd(U)

#Now compute true cluster error
k = 3
kmeans = sklearn.cluster.KMeans(k)
kmeans.fit(U)