def testGetWaveletFeaturesTest(self): #See if we can reproduce the data from the wavelet X, X2, Xs, Xopls, YList, df = MetabolomicsUtils.loadData() waveletStr = 'db4' mode = "zpd" level = 10 C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode) X0 = pywt.waverec(C, waveletStr, mode) tol = 10**-6 self.assertTrue(numpy.linalg.norm(X0 - X[0, :]) < tol) def reconstructSignal(X, Xw, waveletStr, level, mode, C): Xrecstr = numpy.zeros(X.shape) for i in range(Xw.shape[0]): C2 = [] colIndex = 0 for j in range(len(list(C))): C2.append(Xw[i, colIndex:colIndex+len(C[j])]) colIndex += len(C[j]) Xrecstr[i, :] = pywt.waverec(tuple(C2), waveletStr, mode) return Xrecstr #Now do the same for the whole X C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode) Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) Xrecstr = reconstructSignal(X, Xw, waveletStr, level, mode, C) self.assertTrue(numpy.linalg.norm(X - Xrecstr) < tol) waveletStr = 'db8' C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode) Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) Xrecstr = reconstructSignal(X, Xw, waveletStr, level, mode, C) self.assertTrue(numpy.linalg.norm(X - Xrecstr) < tol) waveletStr = 'haar' C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode) Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) Xrecstr = reconstructSignal(X, Xw, waveletStr, level, mode, C) self.assertTrue(numpy.linalg.norm(X - Xrecstr) < tol)
def testReconstructSignal(self): numExamples = 100 numFeatures = 16 X = numpy.random.rand(numExamples, numFeatures) level = 10 mode = "cpd" waveletStr = "db4" C = pywt.wavedec(X[0, :], waveletStr, mode, level=10) Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) X2 = MetabolomicsUtils.reconstructSignal(X, Xw, waveletStr, mode, C) tol = 10**-6 self.assertTrue(numpy.linalg.norm(X - X2) < tol)
def testFilterWavelet(self): numExamples = 100 numFeatures = 16 X = numpy.random.rand(numExamples, numFeatures) level = 10 mode = "cpd" waveletStr = "db4" C = pywt.wavedec(X[0, :], waveletStr, mode, level=10) Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) N = 10 Xw2, inds = MetabolomicsUtils.filterWavelet(Xw, N) tol = 10**-6 self.assertEquals(inds.shape[0], N) self.assertTrue(numpy.linalg.norm( Xw[:, inds] - Xw2[:, inds] ) < tol) zeroInds = numpy.setdiff1d(numpy.arange(Xw.shape[1]), inds) self.assertTrue(numpy.linalg.norm(Xw2[:, zeroInds]) < tol)
logging.debug('parent process:' + str(os.getppid())) logging.debug('process id:' + str(os.getpid())) self.saveResults(self.funcLeafRankGenerators, False) logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) logging.debug("Running from machine " + str(gethostname())) numpy.random.seed(21) dataDir = PathDefaults.getDataDir() + "metabolomic/" X, X2, Xs, XOpls, YList, ages, df = MetabolomicsUtils.loadData() waveletStr = 'db4' mode = "cpd" level = 10 XwDb4 = MetabolomicsUtils.getWaveletFeatures(X, 'db4', level, mode) XwDb8 = MetabolomicsUtils.getWaveletFeatures(X, 'db8', level, mode) XwHaar = MetabolomicsUtils.getWaveletFeatures(X, 'haar', level, mode) dataList = [] dataList.extend([(XwDb4, "db4")]) lock = multiprocessing.Lock() numpy.random.seed(datetime.datetime.now().microsecond) #numpy.random.seed(21) permInds = numpy.random.permutation(len(dataList)) numpy.random.seed(21) try: for ind in permInds:
logging.debug('module name:' + __name__) logging.debug('parent process:' + str(os.getppid())) logging.debug('process id:' + str(os.getpid())) self.saveResults(self.pcaLeafRankGenerators, "pca") logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) logging.debug("Running from machine " + str(gethostname())) numpy.random.seed(21) dataDir = PathDefaults.getDataDir() + "metabolomic/" X, X2, Xs, XOpls, YList, ages, df = MetabolomicsUtils.loadData() mode = "cpd" level = 10 XwDb4 = MetabolomicsUtils.getWaveletFeatures(X, 'db4', level, mode) XwDb8 = MetabolomicsUtils.getWaveletFeatures(X, 'db8', level, mode) XwHaar = MetabolomicsUtils.getWaveletFeatures(X, 'haar', level, mode) #Filter the wavelets Ns = [10, 25, 50, 75, 100] dataList = [] for i in range(len(Ns)): N = Ns[i] XwDb4F, inds = MetabolomicsUtils.filterWavelet(XwDb4, N) dataList.append((XwDb4F[:, inds], "Db4-" + str(N))) XwDb8F, inds = MetabolomicsUtils.filterWavelet(XwDb8, N) dataList.append((XwDb8F[:, inds], "Db8-" + str(N)))
from apgl.util.PathDefaults import PathDefaults from exp.metabolomics.MetabolomicsUtils import MetabolomicsUtils import numpy import pywt dataDir = PathDefaults.getDataDir() + "functional/" fileName = dataDir + "synthetic_control.data" X = numpy.loadtxt(fileName) #Ignore first 200 examples X = X[200:, :] Y = numpy.zeros(X.shape[0]) Y[0:200] = -1 #Increading trend and decreasing trend Y[200:] = 1 #Upward shift and downward shift #Compute wavelets waveletStr = "db2" level = 2 mode = "cpd" Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) print(X.shape) print(Xw.shape) C = pywt.wavedec(X[0, :], waveletStr, mode, level) for c in C: print(c.shape)
logging.debug("process id:" + str(os.getpid())) self.saveResults(self.funcLeafRankGenerators, False) logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) logging.debug("Running from machine " + str(gethostname())) numpy.random.seed(21) dataDir = PathDefaults.getDataDir() + "metabolomic/" X, X2, Xs, XOpls, YList, ages, df = MetabolomicsUtils.loadData() waveletStr = "db4" mode = "cpd" level = 10 XwDb4 = MetabolomicsUtils.getWaveletFeatures(X, "db4", level, mode) XwDb8 = MetabolomicsUtils.getWaveletFeatures(X, "db8", level, mode) XwHaar = MetabolomicsUtils.getWaveletFeatures(X, "haar", level, mode) dataList = [] dataList.extend([(XwDb4, "db4")]) lock = multiprocessing.Lock() numpy.random.seed(datetime.datetime.now().microsecond) # numpy.random.seed(21) permInds = numpy.random.permutation(len(dataList)) numpy.random.seed(21) try: for ind in permInds: