def loadSingleTensor(inFilePattern): X = sptensor.loadTensor(inFilePattern.format("data")) tensorInfo = shelve.open(inFilePattern.format("info"), "r") axisDict = tensorInfo[AXIS] classDict = tensorInfo[CLASS] tensorInfo.close() return X, axisDict, classDict
def decomposeCountTensor(filename, R, outerIters=20, innerIters=10, convergeTol=1e-2, zeroTol=1e-4): """ Given a file, load the tensor data and then From a file, load the tensor data and then decompose using CP_APR with specified rank Parameters: filename - the file that stores the sparse tensor representation using numpy R - the rank of the tensor outerIters - the maximum number of outer iterations innerIters - the maximum number of inner iterations convergeTol - the convergence tolerance zeroTol - the amount to zero out the factors Output: """ X = sptensor.loadTensor(filename) Y, iterStats, modelStats = CP_APR.cp_apr(X, R, tol=convergeTol, maxiters=outerIters, maxinner=innerIters) # normalize the factors using the 1 norm and then sort in descending order Y.normalize_sort(1) Y = zeroSmallFactors(Y, zeroThr=zeroTol) return Y, iterStats, modelStats
## experimental setup exptID = args.expt inFile = args.inputFile R = args.rank seed = args.seed outerIters = args.iterations innerIters = 10 tol = 1e-2 zeroThr = 1e-10 noiseParam = 2 noisePercent = [0.01, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5] # input file and output file inputFile = inFile.format("data") X = sptensor.loadTensor(inputFile) def factorTensor(X): # set the seed for the same initialization np.random.seed(seed) Y, iterStats, mstats = CP_APR.cp_apr(X, R, tol=tol, maxiters=outerIters, maxinner=innerIters) Y.normalize_sort(1) Y = decompTools.zeroSmallFactors(Y, zeroThr=zeroThr) return Y print "Starting Tensor Factorization with ID:{0}".format(exptID) # compute the base comparison baseTF = factorTensor(X) totNonzero = len(X.vals) outfile = open("results/perturb-{0}.json".format(exptID), 'w')
help="number of bootstrap samples", default=10) args = parser.parse_args() inputFile = args.inputFile nSample = args.sample exptID = args.expt + nSample totSamples = args.bootstrap testSize = args.testSize seed = 10 innerIter = 10 outerIter = args.iter R = args.rank zeroThr = 1e-4 X = sptensor.loadTensor(inputFile.format("data")) yaxis = decompTools.loadAxisInfo(inputFile.format("info")) tensorInfo = shelve.open(inputFile.format("info"), "r") Y = np.array(tensorInfo["class"], dtype='int') tensorInfo.close() diagMed = [[a, b] for a, b in itertools.product(yaxis[1], yaxis[2])] predFile = "results/pred-metric-{0}-{1}.csv".format(exptID, nSample) ttss = StratifiedShuffleSplit(Y, n_iter=totSamples, test_size=testSize, random_state=seed) print "Starting Tensor Prediction with ID:{0}".format(exptID) n = 0
parser.add_argument("rank", type=int, help="rank of the decomposition") parser.add_argument("iter", type=int, help="the number of iterations") parser.add_argument("-t", "--testSize", type=float, help="test size", default=0.5) args = parser.parse_args() rank = args.rank iter = args.iter inputFile = args.inputFile exptID = args.exptID patientSet = args.patientSet outsql = "results/pred-model-{0}-{1}.sql".format(exptID, rank) print "Using Rank {0} and iterations {1} and test size {2}".format(rank, iter, args.testSize) ## Load information to run the tests X = sptensor.loadTensor(inputFile.format("data")) tensorInfo = shelve.open(inputFile.format("info"), "r") Y = tensorInfo["class"] XAxis = tensorInfo["axis"] tensorInfo.close() Y = np.array(Y, dtype=int) pm = predictionModel.predictionModel(X, XAxis, Y, rank, testSize=args.testSize, outerIter=iter) output = pm.evaluatePrediction() output = np.column_stack((np.repeat(exptID, output.shape[0]), output)) outputFile = "results/pred-model-{0}-{1}.csv".format(exptID, rank) np.savetxt(outputFile, output, delimiter=",") sqlOut = file(outsql, "w") sqlOut.write("load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel, insert into joyceho.predictive_results;\n".format(outputFile)) sqlOut.write("insert into joyceho.predictive_models values({0}, {1}, \'{2}\',{3}, {4});\n".format(exptID, rank, patientSet, iter, 10)) sqlOut.close()
## experimental setup exptID = args.expt inFile = args.inputFile R = args.rank seed = args.seed outerIters = args.iterations innerIters = 10 tol = 1e-2 zeroThr = 1e-10 noiseParam = 2 noisePercent = [0.01, 0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5] # input file and output file inputFile = inFile.format("data") X = sptensor.loadTensor(inputFile) def factorTensor(X): # set the seed for the same initialization np.random.seed(seed) Y, iterStats, mstats = CP_APR.cp_apr(X, R, tol=tol, maxiters=outerIters, maxinner=innerIters) Y.normalize_sort(1) Y = decompTools.zeroSmallFactors(Y, zeroThr=zeroThr) return Y
import CP_APR import sptensor parser = argparse.ArgumentParser() parser.add_argument("inputFile", help="input file to parse") parser.add_argument("outputFile", help="output file for profile information") parser.add_argument("-r", "--rank", type=int, help="rank of factorization", default=100) parser.add_argument("-i", "--iters", type=int, help="Number of outer interations", default=100) args = parser.parse_args() ########## Profile tensor factorization ############### X = sptensor.loadTensor(args.inputFile) ## Profile outputFile = args.outputFile cProfile.run( "CP_APR.cp_apr(X,R={0},tol=1e-2, maxiters={1}, maxinner=10)".format( args.rank, args.iters), filename=outputFile) p = pstats.Stats(outputFile) p.sort_stats('time').print_stats()
R = 50 iters=70 samples=10 pcaModel = RandomizedPCA(n_components=R) stats = np.zeros((1, 6)) parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", help="input file", default='data/hf-tensor-level1-data.dat') parser.add_argument("-e", "--exptID", help="experiment", default=0) args = parser.parse_args() # Load the original data filename = args.infile X = sptensor.loadTensor(filename) pn = args.exptID xprime = X flatX = sptenmat.sptenmat(xprime, [0]).tocsrmat() # matricize along the first mode stats = np.zeros((1,6)) ## NMF Timing for k in range(samples): startTime = time.time() nmfModel = nimfa.mf(flatX, method="nmf", max_iter=iters, rank=R) nmfResult = nimfa.mf_run(nmfModel) elapsed = time.time() - startTime stats = np.vstack((stats, np.array([R, iters, pn, k, "NMF", elapsed]))) ## PCA Timing
----------------------- PF : the factor matrix where rows is patients and the column are factor values axis : the axis label of patients PIDs """ factors = pf.shape[1] # the number of columns rows = pf.shape[0] idx = np.flatnonzero(pf[:, 0]) dbOut = np.column_stack((axis[idx], np.repeat(0, len(idx)), pf[idx, 0])) for col in range(1, factors): idx = np.flatnonzero(pf[:, col]) dbOut = np.vstack((dbOut, np.column_stack((axis[idx], np.repeat(col, len(idx)), pf[idx, col])))) return dbOut refX = sptensor.loadTensor(inputFile.format(0, "data")) refAxis = decompTools.loadAxisInfo(inputFile.format(0, "info")) ## Find the factors for the first one klp, M, mstats = findFactors(refX, R=rank, outerIter=outerIter, innerIter=10) ## Store off the factors to be loaded into a database M.writeRawFile(MrawFile) Mout = decompTools.getDBOutput(M, refAxis) Mout = np.column_stack((np.repeat(exptID, Mout.shape[0]), Mout)) np.savetxt(Moutfile, Yout, fmt="%s", delimiter="|") sqlOut = file(Ysqlfile, "w") ## write the factors and the models into the database sqlOut.write( "load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel| insert into joyceho.tensor_factors;\n".format( Youtfile
import argparse import cProfile import pstats import CP_APR import sptensor parser = argparse.ArgumentParser() parser.add_argument("inputFile", help="input file to parse") parser.add_argument("outputFile", help="output file for profile information") parser.add_argument("-r", "--rank", type=int, help="rank of factorization", default=100) parser.add_argument("-i", "--iters", type=int, help="Number of outer interations", default=100) args = parser.parse_args() ########## Profile tensor factorization ############### X = sptensor.loadTensor(args.inputFile) ## Profile outputFile = args.outputFile cProfile.run("CP_APR.cp_apr(X,R={0},tol=1e-2, maxiters={1}, maxinner=10)".format(args.rank, args.iters), filename=outputFile) p = pstats.Stats(outputFile) p.sort_stats('time').print_stats()
""" factors = pf.shape[1] # the number of columns rows = pf.shape[0] idx = np.flatnonzero(pf[:, 0]) dbOut = np.column_stack((axis[idx], np.repeat(0, len(idx)), pf[idx, 0])) for col in range(1, factors): idx = np.flatnonzero(pf[:, col]) dbOut = np.vstack( (dbOut, np.column_stack((axis[idx], np.repeat(col, len(idx)), pf[idx, col])))) return dbOut refX = sptensor.loadTensor(inputFile.format(0, "data")) refAxis = decompTools.loadAxisInfo(inputFile.format(0, "info")) ## Find the factors for the first one klp, M, mstats = findFactors(refX, R=rank, outerIter=outerIter, innerIter=10) ## Store off the factors to be loaded into a database M.writeRawFile(MrawFile) Mout = decompTools.getDBOutput(M, refAxis) Mout = np.column_stack((np.repeat(exptID, Mout.shape[0]), Mout)) np.savetxt(Moutfile, Yout, fmt="%s", delimiter="|") sqlOut = file(Ysqlfile, "w") ## write the factors and the models into the database sqlOut.write( "load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel| insert into joyceho.tensor_factors;\n" .format(Youtfile))
parser.add_argument("-r", "--rank", type=int, help="rank of factorization", default=40) parser.add_argument("-s", "--seed", type=int, help="random seed", default=0) parser.add_argument("-i", "--iterations", type=int, help="Number of outer interations", default=70) args = parser.parse_args() R = args.rank seed = args.seed iters = args.iterations filename = args.inputFile exptID = args.expt innerIter = 10 patThresh = 1e-50 modeThr = 1e-2 X = sptensor.loadTensor(filename.format("data")) yaxis = decompTools.loadAxisInfo(filename.format("info")) ## calculate diagnosis-medication combination diagMed = [[a, b] for a, b in itertools.product(yaxis[1], yaxis[2])] def getDBEntry(featureName, m): output = np.zeros((1, 4)) for r in range(R): # get the nonzero indices idx = np.flatnonzero(m[:, r]) tmp = np.column_stack((np.array(diagMed)[idx], np.repeat(r, len(idx)), m[idx, r])) output = np.vstack((output, tmp)) output = np.delete(output, (0), axis=0) output = np.column_stack((np.repeat(exptID, output.shape[0]), np.repeat(featureName, output.shape[0]), output)) return output