import numpy as np import shelve from sklearn.cross_validation import StratifiedShuffleSplit from sklearn import preprocessing import nimfa import sys sys.path.append("..") import KLProjection import predictionModel import sptenmat import tensorTools import json X, axisDict, classDict = tensorTools.loadSingleTensor("data/cms-tensor-{0}.dat") Y = np.array(classDict.values(), dtype='int') flatX = sptenmat.sptenmat(X, [0]).tocsrmat() # matricize along the first mode testSize = 0.5 seed = 400 R = 50 ttss = StratifiedShuffleSplit(Y, n_iter=1, test_size=testSize, random_state=seed) for train, test in ttss: nmfModel = nimfa.mf(flatX[train,:], method="nmf", max_iter=200, rank=R) nmfResult = nimfa.mf_run(nmfModel) nmfBasis = nmfResult.coef().transpose() nmfBasis = preprocessing.normalize(nmfBasis, norm="l1", axis=0) nmfBasisA = nmfBasis.toarray()
parser.add_argument("-r", "--rank", type=int, help="rank of factorization", default=40) parser.add_argument("-s", "--seed", type=int, help="random seed", default=0) parser.add_argument("-i", "--iterations", type=int, help="Number of outer interations", default=100) args = parser.parse_args() ## experimental setup exptID = args.expt exptDesc = args.exptDescription R = args.rank seed = args.seed outerIters = args.iterations innerIters = 10 tol = 1e-2 ## load tensor information X, axisDict, classDict = tensorTools.loadSingleTensor(args.inputFile) ## connection to mongo-db client = MongoClient() db = client.gravel exptDB = db.factor ## verify the experimentID is okay if exptDB.find({"id": exptID}).count(): print "Experiment ID already exists, select another" return print "Starting Tensor Factorization with ID:{0}".format(exptID) np.random.seed(seed) ## factorize using CP_APR (this is the original)
import tensorTools def loadJSON(fn): with open(fn, 'rb') as outfile: jsonDict = json.load(outfile) outfile.close() return jsonDict MBias = ktensor.loadTensor( "../results/pred-raw-bias-marble-{0}.dat".format(run)) M = ktensor.loadTensor("../results/pred-raw-marble-{0}.dat".format(run)) MCP = ktensor.loadTensor("../results/pred-raw-cpapr-{0}.dat".format(run)) X, axisDict, classDict = tensorTools.loadSingleTensor( "../data/cms-tensor-{0}.dat") cptLevel = loadJSON("../data/cpt-level2.json") icdLevel = loadJSON("../data/icd-level2.json") ## lookup values def lookupDict(idx, n, axisDict, levelDict): ivAxis = {v: k for k, v in axisDict[n].items()} modeCat = [levelDict[str(ivAxis[k])] for k in idx] return modeCat ## get the top k from MBias def getTopK(MF, n, axisDict, levelDict, k=10): sortIdx = np.argsort(MF.U[n], axis=None)[::-1][:k]
type=int, help="Number of outer interations", default=100) args = parser.parse_args() ## experimental setup exptID = args.expt exptDesc = args.exptDescription R = args.rank seed = args.seed outerIters = args.iterations innerIters = 10 tol = 1e-2 ## load tensor information X, axisDict, classDict = tensorTools.loadSingleTensor(args.inputFile) ## connection to mongo-db client = MongoClient() db = client.gravel exptDB = db.factor ## verify the experimentID is okay if exptDB.find({"id": exptID}).count(): print "Experiment ID already exists, select another" return print "Starting Tensor Factorization with ID:{0}".format(exptID) np.random.seed(seed) ## factorize using CP_APR (this is the original)