Example #1
0
import numpy as np
import shelve
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn import preprocessing
import nimfa

import sys
sys.path.append("..")

import KLProjection
import predictionModel
import sptenmat
import tensorTools
import json

X, axisDict, classDict = tensorTools.loadSingleTensor("data/cms-tensor-{0}.dat")
Y = np.array(classDict.values(), dtype='int')

flatX =  sptenmat.sptenmat(X, [0]).tocsrmat() # matricize along the first mode
testSize = 0.5

seed = 400
R = 50
ttss = StratifiedShuffleSplit(Y, n_iter=1, test_size=testSize, random_state=seed)

for train, test in ttss:
	nmfModel = nimfa.mf(flatX[train,:], method="nmf", max_iter=200, rank=R)
	nmfResult = nimfa.mf_run(nmfModel)
	nmfBasis = nmfResult.coef().transpose()
	nmfBasis = preprocessing.normalize(nmfBasis, norm="l1", axis=0)
	nmfBasisA = nmfBasis.toarray()
parser.add_argument("-r", "--rank", type=int, help="rank of factorization", default=40)
parser.add_argument("-s", "--seed", type=int, help="random seed", default=0)
parser.add_argument("-i", "--iterations", type=int, help="Number of outer interations", default=100)
args = parser.parse_args()

## experimental setup
exptID = args.expt
exptDesc = args.exptDescription
R = args.rank
seed = args.seed
outerIters = args.iterations
innerIters = 10
tol = 1e-2

## load tensor information
X, axisDict, classDict = tensorTools.loadSingleTensor(args.inputFile)

## connection to mongo-db
client = MongoClient()
db = client.gravel
exptDB = db.factor

## verify the experimentID is okay
if exptDB.find({"id": exptID}).count():
	print "Experiment ID already exists, select another"
	return

print "Starting Tensor Factorization with ID:{0}".format(exptID)
np.random.seed(seed)

## factorize using CP_APR (this is the original)
import tensorTools


def loadJSON(fn):
    with open(fn, 'rb') as outfile:
        jsonDict = json.load(outfile)
        outfile.close()
    return jsonDict


MBias = ktensor.loadTensor(
    "../results/pred-raw-bias-marble-{0}.dat".format(run))
M = ktensor.loadTensor("../results/pred-raw-marble-{0}.dat".format(run))
MCP = ktensor.loadTensor("../results/pred-raw-cpapr-{0}.dat".format(run))

X, axisDict, classDict = tensorTools.loadSingleTensor(
    "../data/cms-tensor-{0}.dat")

cptLevel = loadJSON("../data/cpt-level2.json")
icdLevel = loadJSON("../data/icd-level2.json")


## lookup values
def lookupDict(idx, n, axisDict, levelDict):
    ivAxis = {v: k for k, v in axisDict[n].items()}
    modeCat = [levelDict[str(ivAxis[k])] for k in idx]
    return modeCat


## get the top k from MBias
def getTopK(MF, n, axisDict, levelDict, k=10):
    sortIdx = np.argsort(MF.U[n], axis=None)[::-1][:k]
                    type=int,
                    help="Number of outer interations",
                    default=100)
args = parser.parse_args()

## experimental setup
exptID = args.expt
exptDesc = args.exptDescription
R = args.rank
seed = args.seed
outerIters = args.iterations
innerIters = 10
tol = 1e-2

## load tensor information
X, axisDict, classDict = tensorTools.loadSingleTensor(args.inputFile)

## connection to mongo-db
client = MongoClient()
db = client.gravel
exptDB = db.factor

## verify the experimentID is okay
if exptDB.find({"id": exptID}).count():
    print "Experiment ID already exists, select another"
    return

print "Starting Tensor Factorization with ID:{0}".format(exptID)
np.random.seed(seed)

## factorize using CP_APR (this is the original)