default=10) args = parser.parse_args() inputFile = args.inputFile nSample = args.sample exptID = args.expt + nSample totSamples = args.bootstrap testSize = args.testSize seed = 10 innerIter = 10 outerIter = args.iter R = args.rank zeroThr = 1e-4 X = sptensor.loadTensor(inputFile.format("data")) yaxis = decompTools.loadAxisInfo(inputFile.format("info")) tensorInfo = shelve.open(inputFile.format("info"), "r") Y = np.array(tensorInfo["class"], dtype='int') tensorInfo.close() diagMed = [[a, b] for a, b in itertools.product(yaxis[1], yaxis[2])] predFile = "results/pred-metric-{0}-{1}.csv".format(exptID, nSample) ttss = StratifiedShuffleSplit(Y, n_iter=totSamples, test_size=testSize, random_state=seed) print "Starting Tensor Prediction with ID:{0}".format(exptID) n = 0
parser.add_argument("-re", "--rankend", type=int, help="End of rank increments", default=200) args = parser.parse_args() # Experimental sections iter = args.iterations innerIter = 10 tol = 1e-2 zeroThr = 1e-5 seed = args.seed R = np.concatenate(([5, 10], np.arange(args.rankstart, args.rankend+25, 25))) exptID = args.expt labelID = args.label exptDesc = args.description inputFile = args.inputFile.format("data") yaxis = decompTools.loadAxisInfo(args.inputFile.format("info")) sqlOutfile = "results/rank-sql-{0}.sql".format(exptID) rawFilePattern = "results/rank-raw-{0}-{1}.dat" dbOutPattern = "results/rank-db-{0}-{1}.dat" dbTimePattern = "results/rank-time-db-{0}-{1}.dat" print "Starting Tensor Rank Experiment with ID:{0}".format(exptID) sqlOut = file(sqlOutfile, "w") for r in R: np.random.seed(seed) Y, ystats, mstats = decompTools.decomposeCountTensor(inputFile, R=r, outerIters=iter, innerIters=innerIter, convergeTol=tol, zeroTol=zeroThr) Y.writeRawFile(rawFilePattern.format(exptID, r)) # output the raw file # output the saved db file dbYFile = dbOutPattern.format(exptID, r)
""" factors = pf.shape[1] # the number of columns rows = pf.shape[0] idx = np.flatnonzero(pf[:, 0]) dbOut = np.column_stack((axis[idx], np.repeat(0, len(idx)), pf[idx, 0])) for col in range(1, factors): idx = np.flatnonzero(pf[:, col]) dbOut = np.vstack( (dbOut, np.column_stack((axis[idx], np.repeat(col, len(idx)), pf[idx, col])))) return dbOut refX = sptensor.loadTensor(inputFile.format(0, "data")) refAxis = decompTools.loadAxisInfo(inputFile.format(0, "info")) ## Find the factors for the first one klp, M, mstats = findFactors(refX, R=rank, outerIter=outerIter, innerIter=10) ## Store off the factors to be loaded into a database M.writeRawFile(MrawFile) Mout = decompTools.getDBOutput(M, refAxis) Mout = np.column_stack((np.repeat(exptID, Mout.shape[0]), Mout)) np.savetxt(Moutfile, Yout, fmt="%s", delimiter="|") sqlOut = file(Ysqlfile, "w") ## write the factors and the models into the database sqlOut.write( "load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel| insert into joyceho.tensor_factors;\n" .format(Youtfile)) sqlOut.write(
PF : the factor matrix where rows is patients and the column are factor values axis : the axis label of patients PIDs """ factors = pf.shape[1] # the number of columns rows = pf.shape[0] idx = np.flatnonzero(pf[:, 0]) dbOut = np.column_stack((axis[idx], np.repeat(0, len(idx)), pf[idx, 0])) for col in range(1, factors): idx = np.flatnonzero(pf[:, col]) dbOut = np.vstack((dbOut, np.column_stack((axis[idx], np.repeat(col, len(idx)), pf[idx, col])))) return dbOut refX = sptensor.loadTensor(inputFile.format(0, "data")) refAxis = decompTools.loadAxisInfo(inputFile.format(0, "info")) ## Find the factors for the first one klp, M, mstats = findFactors(refX, R=rank, outerIter=outerIter, innerIter=10) ## Store off the factors to be loaded into a database M.writeRawFile(MrawFile) Mout = decompTools.getDBOutput(M, refAxis) Mout = np.column_stack((np.repeat(exptID, Mout.shape[0]), Mout)) np.savetxt(Moutfile, Yout, fmt="%s", delimiter="|") sqlOut = file(Ysqlfile, "w") ## write the factors and the models into the database sqlOut.write( "load client from /home/joyceho/workspace/tensor/{0} of del modified by coldel| insert into joyceho.tensor_factors;\n".format( Youtfile )
parser.add_argument("-s", "--seed", type=int, help="random seed", default=0) parser.add_argument("-i", "--iterations", type=int, help="Number of outer interations", default=70) args = parser.parse_args() R = args.rank seed = args.seed iters = args.iterations filename = args.inputFile exptID = args.expt innerIter = 10 patThresh = 1e-50 modeThr = 1e-2 X = sptensor.loadTensor(filename.format("data")) yaxis = decompTools.loadAxisInfo(filename.format("info")) ## calculate diagnosis-medication combination diagMed = [[a, b] for a, b in itertools.product(yaxis[1], yaxis[2])] def getDBEntry(featureName, m): output = np.zeros((1, 4)) for r in range(R): # get the nonzero indices idx = np.flatnonzero(m[:, r]) tmp = np.column_stack((np.array(diagMed)[idx], np.repeat(r, len(idx)), m[idx, r])) output = np.vstack((output, tmp)) output = np.delete(output, (0), axis=0) output = np.column_stack((np.repeat(exptID, output.shape[0]), np.repeat(featureName, output.shape[0]), output)) return output