def testCentreArray(self): numExamples = 10 numFeatures = 3 preprocessor = Standardiser() #Test an everyday matrix X = numpy.random.rand(numExamples, numFeatures) Xc = preprocessor.centreArray(X) centreV = preprocessor.getCentreVector() self.assertAlmostEquals(numpy.sum(Xc), 0, places=3) self.assertTrue((X-centreV == Xc).all()) #Now take out 3 rows of X, normalise and compare to normalised X Xs = X[0:3, :] Xsc = preprocessor.centreArray(Xs) self.assertTrue((Xsc == Xc[0:3, :]).all())
X, X2, Xs, XOpls, YList, ages, df = MetabolomicsUtils.loadData() waveletStr = 'db4' mode = "cpd" maxLevel = 10 errors = numpy.zeros(maxLevel) numFeatures = numpy.zeros(maxLevel) level = 10 waveletStrs = ["haar", "db4", "db8"] #The variances are very similar across different wavelets for waveletStr in waveletStrs: Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) standardiser = Standardiser() Xw = standardiser.centreArray(Xw) w, V = numpy.linalg.eig(Xw.dot(Xw.T)) w = numpy.flipud(numpy.sort(w)) variances = [] variances.append(numpy.sum(w[0:1])/numpy.sum(w)) variances.append(numpy.sum(w[0:5])/numpy.sum(w)) variances.append(numpy.sum(w[0:10])/numpy.sum(w)) variances.append(numpy.sum(w[0:15])/numpy.sum(w)) variances.append(numpy.sum(w[0:20])/numpy.sum(w)) variances.append(numpy.sum(w[0:25])/numpy.sum(w)) variances.append(numpy.sum(w[0:50])/numpy.sum(w)) variances.append(numpy.sum(w[0:100])/numpy.sum(w)) variances.append(numpy.sum(w[0:150])/numpy.sum(w)) variances.append(numpy.sum(w[0:200])/numpy.sum(w)) #print(variances)