コード例 #1
0
    def testReconstructSignal(self):
        numExamples = 100 
        numFeatures = 16 
        X = numpy.random.rand(numExamples, numFeatures)

        level = 10 
        mode = "cpd"
        waveletStr = "db4"
        C = pywt.wavedec(X[0, :], waveletStr, mode, level=10)

        Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode)
        X2 = MetabolomicsUtils.reconstructSignal(X, Xw, waveletStr, mode, C)

        tol = 10**-6 
        self.assertTrue(numpy.linalg.norm(X - X2) < tol)
コード例 #2
0
    def testGetWaveletFeaturesTest(self):
        #See if we can reproduce the data from the wavelet 

        X, X2, Xs, Xopls, YList, df = MetabolomicsUtils.loadData()

        waveletStr = 'db4'
        mode = "zpd"
        level = 10
        C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode)
        X0 = pywt.waverec(C, waveletStr, mode)
        tol = 10**-6
        self.assertTrue(numpy.linalg.norm(X0 - X[0, :]) < tol)

        def reconstructSignal(X, Xw, waveletStr, level, mode, C):
            Xrecstr = numpy.zeros(X.shape)
            
            for i in range(Xw.shape[0]):
                C2 = []

                colIndex = 0
                for j in range(len(list(C))):
                    C2.append(Xw[i, colIndex:colIndex+len(C[j])])
                    colIndex += len(C[j])

                Xrecstr[i, :] = pywt.waverec(tuple(C2), waveletStr, mode)

            return Xrecstr

        #Now do the same for the whole X
        C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode)
        Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode)
        Xrecstr = reconstructSignal(X, Xw, waveletStr, level, mode, C)
        self.assertTrue(numpy.linalg.norm(X - Xrecstr) < tol)

        waveletStr = 'db8'
        C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode)
        Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode)
        Xrecstr = reconstructSignal(X, Xw, waveletStr, level, mode, C)
        self.assertTrue(numpy.linalg.norm(X - Xrecstr) < tol)

        waveletStr = 'haar'
        C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode)
        Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode)
        Xrecstr = reconstructSignal(X, Xw, waveletStr, level, mode, C)
        self.assertTrue(numpy.linalg.norm(X - Xrecstr) < tol)
コード例 #3
0
    def testFilterWavelet(self):
        numExamples = 100
        numFeatures = 16
        X = numpy.random.rand(numExamples, numFeatures)

        level = 10
        mode = "cpd"
        waveletStr = "db4"
        C = pywt.wavedec(X[0, :], waveletStr, mode, level=10)

        Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode)
        
        N = 10
        Xw2, inds = MetabolomicsUtils.filterWavelet(Xw, N)

        tol = 10**-6 
        self.assertEquals(inds.shape[0], N)
        self.assertTrue(numpy.linalg.norm( Xw[:, inds] - Xw2[:, inds] ) < tol)

        zeroInds = numpy.setdiff1d(numpy.arange(Xw.shape[1]), inds)
        self.assertTrue(numpy.linalg.norm(Xw2[:, zeroInds]) < tol)
コード例 #4
0
    def testCreateIndicatorLabels(self):
        metaUtils = MetabolomicsUtils()
        X, XStd, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData()
        
        YCortisol = YCortisol[numpy.logical_not(numpy.isnan(YCortisol))]
        YCortisolIndicators = metaUtils.createIndicatorLabel(YCortisol, metaUtils.boundsDict["Cortisol"])
        
        YTesto = YTesto[numpy.logical_not(numpy.isnan(YTesto))]
        YTestoIndicators = metaUtils.createIndicatorLabel(YTesto, metaUtils.boundsDict["Testosterone"])
        
        YIgf1 = YIgf1[numpy.logical_not(numpy.isnan(YIgf1))]
        YIgf1Indicators = metaUtils.createIndicatorLabel(YIgf1, metaUtils.boundsDict["IGF1"])

        s = numpy.sum(YCortisolIndicators, 1)
        nptst.assert_array_equal(s, numpy.ones(s.shape[0]))

        s = numpy.sum(YTestoIndicators, 1)
        nptst.assert_array_equal(s, numpy.ones(s.shape[0]))

        s = numpy.sum(YIgf1Indicators, 1)
        nptst.assert_array_equal(s, numpy.ones(s.shape[0]))

        #Now compare to those labels in the file
        X, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData()
        dataDir = PathDefaults.getDataDir() +  "metabolomic/"
        fileName = dataDir + "data.RMN.total.6.txt"
        data = pandas.read_csv(fileName, delimiter=",") 

        YCortisolIndicators = metaUtils.createIndicatorLabel(YCortisol, metaUtils.boundsDict["Cortisol"])
        YCortisolIndicators2 = numpy.array(data[["Ind.Cortisol.1", "Ind.Cortisol.2", "Ind.Cortisol.3"]])
        
        for i in range(YCortisolIndicators.shape[0]): 
            if not numpy.isnan(YCortisol[i]) and not numpy.isnan(YCortisolIndicators2[i, :]).any(): 
                #nptst.assert_almost_equal(YCortisolIndicators2[i, :], YCortisolIndicators[i, :])
                pass 
        
        YTestoIndicators = metaUtils.createIndicatorLabel(YTesto, metaUtils.boundsDict["Testosterone"])
        YTestoIndicators2 = numpy.array(data[["Ind.Testo.1", "Ind.Testo.2", "Ind.Testo.3"]])
        
        for i in range(YTestoIndicators.shape[0]): 
            if not numpy.isnan(YTesto[i]) and not numpy.isnan(YTestoIndicators2[i, :]).any(): 
                #print(i, YTesto[i])
                nptst.assert_almost_equal(YTestoIndicators2[i, :], YTestoIndicators[i, :])
                
        YIgf1Indicators = metaUtils.createIndicatorLabel(YIgf1, metaUtils.boundsDict["IGF1"])
        YIgf1Indicators2 = numpy.array(data[["Ind.IGF1.1", "Ind.IGF1.2", "Ind.IGF1.3"]])
        
        for i in range(YIgf1Indicators.shape[0]): 
            if not numpy.isnan(YIgf1[i]) and not numpy.isnan(YIgf1Indicators2[i, :]).any(): 
                #print(i, YIgf1[i])
                #nptst.assert_almost_equal(YIgf1Indicators2[i, :], YIgf1Indicators[i, :])
                pass
コード例 #5
0
    def testScoreLabel(self):#
        numExamples = 10 
        Y = numpy.random.rand(numExamples)

        bounds = numpy.array([0, 0.2, 0.8, 1.0])

        YScores = MetabolomicsUtils.scoreLabels(Y, bounds)

        inds1 = numpy.argsort(Y)
        inds2 = numpy.argsort(YScores[:, 0])
        inds3 = numpy.argsort(YScores[:, -1])

        inds4 = numpy.argsort(numpy.abs(Y - 0.5))
        inds5 = numpy.argsort(YScores[:, 1])

        self.assertTrue((inds1 == inds3).all())
        self.assertTrue((inds1 == numpy.flipud(inds2)).all())
        self.assertTrue((inds4 == numpy.flipud(inds5)).all())

        #Test we don't get problems when Y has the same values
        Y = numpy.ones(numExamples)
        YScores = MetabolomicsUtils.scoreLabels(Y, bounds)

        self.assertTrue((YScores == numpy.ones((Y.shape[0], 3))).all())
コード例 #6
0
 def testLoadData(self): 
     metaUtils = MetabolomicsUtils() 
     
     X, XStd, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData()
コード例 #7
0
from wallhack.metabolomics.MetabolomicsUtils import MetabolomicsUtils
from socket import gethostname
from sklearn.decomposition import PCA

"""
Run a variety of bipartite ranking on the metabolomics data 
"""

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.debug("Running from machine " + str(gethostname()))
numpy.random.seed(21)

os.system('taskset -p 0xffffffff %d' % os.getpid())

dataDir = PathDefaults.getDataDir() +  "metabolomic/"
metaUtils = MetabolomicsUtils() 
X, XStd, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData()

#We model 99.1% of the spectrum with 100 eigenvectors 
pca = PCA(n_components=100)
XPca = pca.fit_transform(X)

mode = "cpd"
level = 10
XwDb4 = MetabolomicsUtils.getWaveletFeatures(X, 'db4', level, mode)
XwDb8 = MetabolomicsUtils.getWaveletFeatures(X, 'db8', level, mode)
XwHaar = MetabolomicsUtils.getWaveletFeatures(X, 'haar', level, mode)

dataDict = {}
dataDict["raw"] = X
dataDict["pca"] = XPca
コード例 #8
0
ファイル: FindBoundaries.py プロジェクト: charanpald/wallhack
import numpy
from wallhack.metabolomics.MetabolomicsUtils import MetabolomicsUtils

X, X2, df = MetabolomicsUtils.loadData()

#Just figure out the boundaries of the levels 
numpy.set_printoptions(threshold=3000)
labelNames = ["IGF1.val", "Cortisol.val", "Testosterone.val"]
labelNames2 = ["Ind.IGF1.1", "Ind.IGF1.2", "Ind.IGF1.3"]
YList = MetabolomicsUtils.createLabelList(df, labelNames)
YList2 = MetabolomicsUtils.createLabelList(df, labelNames2)

Y, inds = YList[0]
Y1 = numpy.array(df.rx(labelNames2[0])).ravel()[inds]
Y2 = numpy.array(df.rx(labelNames2[1])).ravel()[inds]
Y3 = numpy.array(df.rx(labelNames2[2])).ravel()[inds]

inds = numpy.argsort(Y)
YY = numpy.c_[Y[inds], Y1[inds]]
YY = numpy.c_[YY, Y2[inds]]
YY = numpy.c_[YY, Y3[inds]]
print(YY)

labelNames2 = ["Ind.Cortisol.1", "Ind.Cortisol.2", "Ind.Cortisol.3"]
YList2 = MetabolomicsUtils.createLabelList(df, labelNames2)

Y, inds = YList[1]
Y1 = numpy.array(df.rx(labelNames2[0])).ravel()[inds]
Y2 = numpy.array(df.rx(labelNames2[1])).ravel()[inds]
Y3 = numpy.array(df.rx(labelNames2[2])).ravel()[inds]
コード例 #9
0
    def saveResults(self):
        """
        Compute the results and save them for a particular hormone. Does so for all
        learners. 
        """
        metaUtils = MetabolomicsUtils()
        
        logging.debug("Running on hormones: " + str(self.hormoneDict.keys()))
        
        for hormoneName, hormoneConc in self.hormoneDict.items():
            nonNaInds = numpy.logical_not(numpy.isnan(hormoneConc))
            hormoneIndicators = metaUtils.createIndicatorLabel(hormoneConc, metaUtils.boundsDict[hormoneName])

            for i in range(hormoneIndicators.shape[1]):
                #Make labels -1/+1
                Y = numpy.array(hormoneIndicators[nonNaInds, i], numpy.int)*2-1    
                
                for dataName, dataFeatures in self.dataDict.items():
                    X = dataFeatures[nonNaInds, :]
                    X = numpy.c_[X, self.ages[nonNaInds]]
                    X = Standardiser().standardiseArray(X)

                    if self.runCartTreeRank: 
                        fileName = self.resultsDir + "CartTreeRank-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy"
                        self.saveResult(X, Y, self.cartTreeRank, self.cartTreeRankParams, fileName) 
                        
                    if self.runRbfSvmTreeRank: 
                        fileName = self.resultsDir + "RbfSvmTreeRank-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy"
                        self.saveResult(X, Y, self.rbfSvmTreeRank, self.rbfSvmTreeRankParams, fileName)    

                    if self.runL1SvmTreeRank: 
                        fileName = self.resultsDir + "L1SvmTreeRank-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy"
                        self.saveResult(X, Y, self.l1SvmTreeRank, self.l1SvmTreeRankParams, fileName)   
                        
                        #For this SVM save the weight vector 
                        weightsFileName = self.resultsDir + "WeightsL1SvmTreeRank-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy"
                        self.saveWeightVectorResults(X, Y, self.l1SvmTreeRank, self.l1SvmTreeRankParams, weightsFileName)    

                    if self.runCartTreeRankForest: 
                        fileName = self.resultsDir + "CartTreeRankForest-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy"
                        self.saveResult(X, Y, self.cartTreeRankForest, self.cartTreeRankForestParams, fileName) 
                        
                    if self.runRbfSvmTreeRankForest: 
                        fileName = self.resultsDir + "RbfSvmTreeRankForest-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy"
                        self.saveResult(X, Y, self.rbfSvmTreeRankForest, self.rbfSvmTreeRankForestParams, fileName) 
                        
                    if self.runL1SvmTreeRankForest: 
                        fileName = self.resultsDir + "L1SvmTreeRankForest-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy"
                        self.saveResult(X, Y, self.l1SvmTreeRankForest, self.l1SvmTreeRankForestParams, fileName) 
                        
                        #For this SVM save the weight vector 
                        weightsFileName = self.resultsDir + "WeightsL1SvmTreeRankForest-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy"
                        self.saveWeightVectorResults(X, Y, self.l1SvmTreeRankForest, self.l1SvmTreeRankForestParams, weightsFileName)    

                    if self.runRankBoost: 
                        fileName = self.resultsDir + "RankBoost-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy"
                        self.saveResult(X, Y, self.rankBoost, self.rankBoostParams, fileName)
                        
                    if self.runRankSVM: 
                        fileName = self.resultsDir + "RankSVM-" + hormoneName + "-" + str(i) + "-" + dataName + ".npy"
                        self.saveResult(X, Y, self.rankSVM, self.rankSVMParams, fileName)
                        
        logging.debug("All done. See you around!")
コード例 #10
0
import pywt 
from wallhack.metabolomics.MetabolomicsUtils import MetabolomicsUtils
from sandbox.util.PathDefaults import PathDefaults
from socket import gethostname
import matplotlib 
matplotlib.use("GTK3Agg")
import matplotlib.pyplot as plt 
from sandbox.data.Standardiser import Standardiser

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.debug("Running from machine " + str(gethostname()))
numpy.random.seed(21)
numpy.set_printoptions(linewidth=160, precision=3, suppress=True)

dataDir = PathDefaults.getDataDir() +  "metabolomic/"
metaUtils = MetabolomicsUtils() 
X, XStd, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData()

waveletStr = 'db4'
mode = "cpd"
maxLevel = 10
errors = numpy.zeros(maxLevel)
numFeatures = numpy.zeros(maxLevel)

level = 10 
waveletStrs = ["haar", "db4", "db8"]
plt.figure(0)

C = XStd.T.dot(XStd)
w, V = numpy.linalg.eigh(C)
w = numpy.flipud(numpy.sort(w))
コード例 #11
0
ファイル: ProcessResults.py プロジェクト: charanpald/wallhack
import logging
import datetime
import matplotlib 
matplotlib.use("GTK3Agg")
import matplotlib.pyplot as plt  
from sandbox.util.PathDefaults import PathDefaults
from sandbox.util.Latex import Latex 
from wallhack.metabolomics.MetabolomicsUtils import MetabolomicsUtils
from wallhack.metabolomics.MetabolomicsExpHelper import MetabolomicsExpHelper

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
numpy.set_printoptions(suppress=True, precision=3)
resultsDir = PathDefaults.getOutputDir() + "metabolomics/"
figureDir = resultsDir + "Figures/"

metaUtils = MetabolomicsUtils() 
X, XStd, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData()

dataDict = {}
numpy.random.seed(datetime.datetime.now().microsecond)
helper = MetabolomicsExpHelper(dataDict, YCortisol, YTesto, YIgf1, ages)

dataNames =[] 
dataNames.extend(["raw", "pca", "Db4", "Db8", "Haar"])
#algorithms = ["CartTreeRank", "CartTreeRankForest", "L1SvmTreeRank", "L1SvmTreeRankForest", "RbfSvmTreeRank", "RbfSvmTreeRankForest", "RankBoost", "RankSVM"]
algorithms = ["CartTreeRankForest", "L1SvmTreeRankForest", "RbfSvmTreeRankForest", "RankBoost", "RankSVM"]
algorithmsAbbr = ["CART-TRF", "L1-TRF", "RBF-TRF", "RB", "RSVM"]

hormoneNameIndicators = [] 
for i, (hormoneName, hormoneConc) in enumerate(helper.hormoneDict.items()):
    hormoneIndicators = metaUtils.createIndicatorLabel(hormoneConc, metaUtils.boundsDict[hormoneName])