def parseFile(filename, patIdx, medIdx, diagIdx, labelIdx, delim="|"):
    """ 
    Parse a csv file using the delimiter and the appropriate columns of interest.
    The resultant sparse tensor has patient on the 0th mode, diagnosis on the 1st mode,
    and medications on the 2nd mode.
    
    Tensor info contains the axis information for each mode.
    """
    print "Creating the tensor for " + filename

    patList = OrderedDict(sorted({}.items(), key=lambda t:t[1]))
    medList = OrderedDict(sorted({}.items(), key=lambda t:t[1]))
    diagList = OrderedDict(sorted({}.items(), key=lambda t:t[1]))
    patClass = OrderedDict(sorted({}.items(), key=lambda t:t[1]))

    ## storing tensor class as empty array
    tensorIdx = np.array([[0, 0, 0]])
    datfile = open(filename)

    for i, line in enumerate(datfile):
        line = line.rstrip('\r\n')
        parse = line.split(delim)
        
        # insert them into the list if necessary
        if not patList.has_key(parse[patIdx]):
            patList[parse[patIdx]] = len(patList)
            patClass[parse[patIdx]] = parse[labelIdx]
        if not diagList.has_key(parse[diagIdx]):
            diagList[parse[diagIdx]] = len(diagList)
        if not medList.has_key(parse[medIdx]):
            medList[parse[medIdx]] = len(medList)
        
        patId = patList.get(parse[patIdx])
        diagId = diagList.get(parse[diagIdx])
        medId = medList.get(parse[medIdx])
    
        # we know the first one is already mapped
        if i > 1:
            tensorIdx = np.append(tensorIdx, [[patId, diagId, medId]], axis=0)

    tensorVal = np.ones((tensorIdx.shape[0], 1))
    # initialize size
    siz = np.array([len(patList), len(diagList), len(medList)])
    X = sptensor.sptensor(tensorIdx, tensorVal, siz)
    
    tensorInfo = {}
    tensorInfo['axis'] = [patList.keys(), diagList.keys(), medList.keys()]
    tensorInfo['pat'] = patList.keys()
    tensorInfo['med'] = medList.keys()
    tensorInfo['diag'] = diagList.keys()
    tensorInfo['class'] = patClass.values()
      
    return X, tensorInfo
Example #2
0
    def evaluatePredictionAUC_3(self,experCount):
        run = 0
        sumBaseAUC=0.0
        sumCprAUC=0.0

        testCount=4392
        indexC1=np.where(self.X.subs[:,0]<testCount)
        indexC2=np.where(self.X.subs[:,0]>=testCount)
        #print( indexC1)
        subs1=self.X.subs[indexC1]
        subs2=self.X.subs[indexC2]

        subs2[:,0]=subs2[:,0]-testCount
        vals1=self.X.vals[indexC1]
        vals2=self.X.vals[indexC2]
        size1=np.array([testCount,self.X.shape[1],self.X.shape[2]])
        size2=np.array([self.X.shape[0]-testCount,self.X.shape[1],self.X.shape[2]])
        self.Y[self.Y==0]=-1
        Y1=self.Y[:testCount]
        Y2=self.Y[testCount:]
        #print Y1.shape
        trainingX= sptensor.sptensor(subs1, vals1, size1)
        testX= sptensor.sptensor(subs2, vals2, size2)


        MCPR, cpstats, mstats = cp_apr_logis.cp_apr(trainingX, Y1, self.R, maxiters=100, maxinner=50)
        #MCPR, cpstats, mstats = CP_APR.cp_apr(trainingX, self.R, maxiters=100, maxinner=self.innerIter)
        MCPR.normalize_sort(1)

        klproj = KLProjection.KLProjection(MCPR.U, self.R)
        np.random.seed(10)
        testMatrix=klproj.projectSlice(testX, 0)

        ## scale by summing across the rows
        totWeight = np.sum(testMatrix, axis=1)
        zeroIdx = np.where(totWeight < 1e-100)[0]
        if len(zeroIdx) > 0:
            # for the zero ones we're going to evenly distribute
            evenDist = np.repeat(1.0 / self.R, len(zeroIdx)*self.R)
            testMatrix[zeroIdx, :] = evenDist.reshape((len(zeroIdx), self.R))
            totWeight = np.sum(testMatrix, axis=1)
        twMat = np.repeat(totWeight, self.R).reshape(testMatrix.shape[0], self.R)
        testMatrix = testMatrix / twMat
        #print(MCPR.U[0])
        #print(self.rawFeatures)
        rawXfile=self.data_dir+'experiment/trainingX_'+str(experCount)+'.csv'
        rawYfile=self.data_dir+'experiment/trainingY_'+str(experCount)+'.csv'
        cprXfile=self.data_dir+'experiment/testX_'+str(experCount)+'.csv'
        cprYfile=self.data_dir+'experiment/testY_'+str(experCount)+'.csv'
        np.savetxt(rawXfile,MCPR.U[0])
        np.savetxt(rawYfile,Y1)
        np.savetxt(cprXfile, testMatrix)
        np.savetxt(cprYfile,Y2)

        print 'OK'
def tensorSubset(X, sm, subsetIds):
    """ 
    Get a subset of the tensors specified by the subsetIds
    
    Parameters
    ------------
    X : a list of tensors to subset
    sm : a 2-d numpy array specifying the tensor mode locations to compute the subset on
    subsetIds : a list of indices
    
    Output
    -----------
    subsetX : a list of tensors with the indices rebased
    """
    subsetX = [ti for ti in X]
    for row in range(sm.shape[0]):
        tensorIdx = sm[row, 0]
        tensorMode = sm[row, 1]
        subsetIdx = np.in1d(X[tensorIdx].subs[:,tensorMode].ravel(), subsetIds)
        subsIdx = np.where(subsetIdx)[0]
        subsetSubs = X[tensorIdx].subs[subsIdx,:]
        subsetVals = X[tensorIdx].vals[subsIdx]
        subsetSubs = rebase(subsetIds, subsetSubs)
        subsetShape = list(X[tensorIdx].shape)
        subsetShape[tensorMode] = len(subsetIds)
        subsetX[tensorIdx] = sptensor.sptensor(subsetSubs, subsetVals, subsetShape)
    return subsetX
Example #4
0
def tensorSubset(X, sm, subsetIds):
    """ 
    Get a subset of the tensors specified by the subsetIds
    
    Parameters
    ------------
    X : a list of tensors to subset
    sm : a 2-d numpy array specifying the tensor mode locations to compute the subset on
    subsetIds : a list of indices
    
    Output
    -----------
    subsetX : a list of tensors with the indices rebased
    """
    subsetX = [ti for ti in X]
    for row in range(sm.shape[0]):
        tensorIdx = sm[row, 0]
        tensorMode = sm[row, 1]
        subsetIdx = np.in1d(X[tensorIdx].subs[:, tensorMode].ravel(),
                            subsetIds)
        subsIdx = np.where(subsetIdx)[0]
        subsetSubs = X[tensorIdx].subs[subsIdx, :]
        subsetVals = X[tensorIdx].vals[subsIdx]
        subsetSubs = rebase(subsetIds, subsetSubs)
        subsetShape = list(X[tensorIdx].shape)
        subsetShape[tensorMode] = len(subsetIds)
        subsetX[tensorIdx] = sptensor.sptensor(subsetSubs, subsetVals,
                                               subsetShape)
    return subsetX
Example #5
0
def loadMultiTensor(inFilePattern):
    """ 
    Load the list of tensors from this input file format
    
    Parameters
    ------------
    inFilePattern : the input file pattern for the 2 files with the tensor data and axis information
    
    Output
    -----------
    X : the list of tensors in the file
    sharedModes : the 2-d array with the shared modes location
    axisDict : the axis information for all the tensors
    patClass : the patient cohort information
    """
    infile = file(inFilePattern.format("data"), "rb")
    lenX = np.load(infile)
    X = []
    for i in range(lenX):
        subs = np.load(infile)
        vals = np.load(infile)
        siz = np.load(infile)
        X.append(sptensor.sptensor(subs, vals, siz))
    sharedModes = np.load(infile)
    tensorInfo = shelve.open(inFilePattern.format("info"), "r")
    axisDict = tensorInfo[AXIS]
    patClass = tensorInfo[CLASS]
    tensorInfo.close()
    
    return X, sharedModes, axisDict, patClass
Example #6
0
def parseShared2DTensorFile(filename, axis0Dict, axis1Dict, axis0Idx, axis1Idx, valueIdx):
	print "Creating tensor from file " + filename
	## initialize the dictionaries if nonexistent
	if axis0Dict is None:
		axis0Dict = OrderedDict(sorted({}.items(), key=lambda t:t[1]))
	if axis1Dict is None:
		axis1Dict = OrderedDict(sorted({}.items(), key=lambda t:t[1]))
	tensorIdx = np.array([[0, 0]], dtype=int)
	tensorVal = np.array([[0]], dtype=int)
	f = open(filename, "rb")
	for row in csv.reader(f):
		## see if we need to add them to the if
		if not axis0Dict.has_key(row[axis0Idx]):
			axis0Dict[row[axis0Idx]] = len(axis0Dict)
		if not axis1Dict.has_key(row[axis1Idx]):
			axis1Dict[row[axis1Idx]] = len(axis1Dict)
		axis0Id = axis0Dict.get(row[axis0Idx])
		axis1Id = axis1Dict.get(row[axis1Idx])
		tensorIdx = np.vstack((tensorIdx, [[axis0Id, axis1Id]]))
		tensorVal = np.vstack((tensorVal, [[int(row[valueIdx])]]))
	tensorIdx = np.delete(tensorIdx, (0), axis=0)
	tensorVal = np.delete(tensorVal, (0), axis=0)
	f.close()
	tenX = sptensor.sptensor(tensorIdx, tensorVal, np.array([len(axis0Dict), len(axis1Dict)]))
	axisDict = {0: axis0Dict, 1: axis1Dict}
	return tenX, axisDict
Example #7
0
def permute(verbose):
    subs = numpy.array([[1, 2, 3], [1, 1, 3], [2, 0, 1], [4, 3, 4], [1, 0, 1], [1, 0, 0]]);
    vals = numpy.array([[0.5], [1.5], [10], [3.5], [4.5], [5.5]]);
    siz = numpy.array([5, 5, 5]);
    obj = sptensor.sptensor(subs, vals, siz);
    if (verbose):
        print obj;
        print obj.permute([2,0,1]);
Example #8
0
def ttmTests(verbose):

    subs = numpy.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1], [0, 1, 2]])
    vals = numpy.array([[1], [2], [3], [4], [5]])
    obj = sptensor.sptensor(subs, vals)
    A = numpy.array([[10, 20], [30, 40]])
    print obj.ttm(A, 0)

    subs = numpy.array([[1, 2, 2], [1, 1, 2], [2, 0, 1], [1, 0, 1], [1, 0, 0]])
    vals = numpy.array([[0.5], [1.5], [3.5], [4.5], [5.5]])
    obj = sptensor.sptensor(subs, vals)
    print obj
    A = numpy.arange(18).reshape([6, 3])

    print obj.ttm(A, 2)

    print obj.ttm([A, A], [1, 2])
Example #9
0
def ttmTests(verbose):
    
    subs = numpy.array([[0,0,0],[0,1,1],[1,0,1],[1,1,1],[0,1,2]]);
    vals = numpy.array([[1],[2],[3],[4],[5]]);
    obj = sptensor.sptensor(subs,vals);
    A = numpy.array([[10,20],[30,40]]);
    print obj.ttm(A,0);
    
    
    subs = numpy.array([[1, 2, 2], [1, 1, 2], [2, 0, 1], [1, 0, 1], [1, 0, 0]]);
    vals = numpy.array([[0.5], [1.5], [3.5], [4.5], [5.5]]);
    obj = sptensor.sptensor(subs, vals);
    print obj;
    A = numpy.arange(18).reshape([6,3]);
    
    print obj.ttm(A,2);
    
    print obj.ttm([A,A],[1,2]);
Example #10
0
def permute(verbose):
    subs = numpy.array([[1, 2, 3], [1, 1, 3], [2, 0, 1], [4, 3, 4], [1, 0, 1],
                        [1, 0, 0]])
    vals = numpy.array([[0.5], [1.5], [10], [3.5], [4.5], [5.5]])
    siz = numpy.array([5, 5, 5])
    obj = sptensor.sptensor(subs, vals, siz)
    if (verbose):
        print obj
        print obj.permute([2, 0, 1])
Example #11
0
    def tosptensor(self):
        """ returns the sptensor object
        that contains the same value with the tensor object."""

        length = len(self.shape)

        sub = tools.allIndices(self.shape)
        return sptensor.sptensor(
            sub,
            self.data.flatten().reshape(self.data.size, 1), self.shape)
Example #12
0
def mathops(verbose):
    subs = numpy.array([[0, 0, 0], [0, 0, 2], [1, 1, 1], [3, 3, 3], [0, 0, 0], [0, 0, 0]]);
    subs2 = numpy.array([[0, 2, 4], [0, 0, 2], [1, 1, 1], [3, 3, 3], [0, 0, 0], [0, 0, 0]]);
    vals = numpy.array([[0.5], [1.5], [2.5], [3.5], [4.5], [5.5]]);
    vals2 = numpy.array([[0.5], [1.5], [100], [3.5], [4.5], [5.5]]);
    siz = numpy.array([4, 4, 4]);

    obj = sptensor.sptensor(subs, vals, siz);
    obj2 = sptensor.sptensor(subs2, vals2, siz);
    if(verbose == 1):
        print obj == obj2;
    if(verbose == 1):
        print obj == obj;
    if(verbose == 1):
        print obj + 100;
        print obj - 100;
        print obj * 3.4;
    if(verbose == 1):
        print obj + obj2;
        print obj - obj2;
Example #13
0
 def tosptensor(self):
     """ returns the sptensor object
     that contains the same value with the tensor object."""
     
     length = len(self.shape);
     
     sub = tools.allIndices(self.shape);
     return sptensor.sptensor(
         sub,
         self.data.flatten().reshape(self.data.size, 1),
         self.shape);    
Example #14
0
 def tosptensor(self):
     """ returns the sptensor object
     that contains the same value with the tensor object."""
     nnz = numpy.nonzero(self.data)
     vals = self.data[nnz]
     totVals = len(vals)
     vals = numpy.reshape(vals, (totVals, 1))
     subs = numpy.zeros((totVals, self.ndims()),dtype = 'int')
     for n in range(self.ndims()):
         subs[:, n] = nnz[n]
     return sptensor.sptensor(subs, vals, self.shape)
Example #15
0
def tosparsematTest(verbose):
    subs = numpy.array([[1, 3, 5], [1, 1, 0], [2, 2, 2], [3, 4, 4], [1, 1, 1], [1, 1, 1]]);
    vals = numpy.array([[0.5], [1.5], [100], [3.5], [4.5], [5.5]]);
    siz = numpy.array([4, 5, 6]);
    spt = sptensor.sptensor(subs, vals, siz);
    print spt;
    
    sptm = sptenmat.sptenmat(spt,[1]);
    print sptm;
    
    print sptm.tosparsemat();
Example #16
0
def generateRandomProblem(MFull):
	## calculate the two together
	nnz = np.nonzero(MFull.data)
	mfVals = MFull.data.flatten()
	xVals = np.reshape([np.random.poisson(l) for l in mfVals], (len(mfVals), 1))
	Xsubs = np.zeros((len(mfVals), MFull.ndims()))
	Xsubs.dtype = 'int'
	for n in range(MFull.ndims()):
		Xsubs[:, n] = nnz[n]
	X = sptensor.sptensor(Xsubs, xVals, MFull.shape)
	## return the observation
	return X
Example #17
0
 def tosptensor(self):
     """ returns the sptensor object
     that contains the same value with the tensor object."""
     nnz = numpy.nonzero(self.data)
     vals = self.data[nnz]
     totVals = len(vals)
     vals = numpy.reshape(vals, (totVals, 1))
     subs = numpy.zeros((totVals, self.ndims()))
     subs.dtype = 'int'
     for n in range(self.ndims()):
         subs[:, n] = nnz[n]
     return sptensor.sptensor(subs, vals, self.shape)
Example #18
0
def tosparsematTest(verbose):
    subs = numpy.array([[1, 3, 5], [1, 1, 0], [2, 2, 2], [3, 4, 4], [1, 1, 1],
                        [1, 1, 1]])
    vals = numpy.array([[0.5], [1.5], [100], [3.5], [4.5], [5.5]])
    siz = numpy.array([4, 5, 6])
    spt = sptensor.sptensor(subs, vals, siz)
    print spt

    sptm = sptenmat.sptenmat(spt, [1])
    print sptm

    print sptm.tosparsemat()
Example #19
0
def parseFile(filename, patIdx, medIdx, diagIdx, labelIdx, delim="|"):
    """ 
    Parse a csv file using the delimiter and the appropriate columns of interest.
    The resultant sparse tensor has patient on the 0th mode, diagnosis on the 1st mode,
    and medications on the 2nd mode.
    
    Tensor info contains the axis information for each mode.
    """
    print "Creating the tensor for " + filename

    patList = OrderedDict(sorted({}.items(), key=lambda t: t[1]))
    medList = OrderedDict(sorted({}.items(), key=lambda t: t[1]))
    diagList = OrderedDict(sorted({}.items(), key=lambda t: t[1]))
    patClass = OrderedDict(sorted({}.items(), key=lambda t: t[1]))

    ## storing tensor class as empty array
    tensorIdx = np.array([[0, 0, 0]])
    datfile = open(filename)

    for i, line in enumerate(datfile):
        line = line.rstrip('\r\n')
        parse = line.split(delim)

        # insert them into the list if necessary
        if not patList.has_key(parse[patIdx]):
            patList[parse[patIdx]] = len(patList)
            patClass[parse[patIdx]] = parse[labelIdx]
        if not diagList.has_key(parse[diagIdx]):
            diagList[parse[diagIdx]] = len(diagList)
        if not medList.has_key(parse[medIdx]):
            medList[parse[medIdx]] = len(medList)

        patId = patList.get(parse[patIdx])
        diagId = diagList.get(parse[diagIdx])
        medId = medList.get(parse[medIdx])

        # we know the first one is already mapped
        if i > 1:
            tensorIdx = np.append(tensorIdx, [[patId, diagId, medId]], axis=0)

    tensorVal = np.ones((tensorIdx.shape[0], 1))
    # initialize size
    siz = np.array([len(patList), len(diagList), len(medList)])
    X = sptensor.sptensor(tensorIdx, tensorVal, siz)

    tensorInfo = {}
    tensorInfo['axis'] = [patList.keys(), diagList.keys(), medList.keys()]
    tensorInfo['pat'] = patList.keys()
    tensorInfo['med'] = medList.keys()
    tensorInfo['diag'] = diagList.keys()
    tensorInfo['class'] = patClass.values()

    return X, tensorInfo
Example #20
0
def mathops(verbose):
    subs = numpy.array([[0, 0, 0], [0, 0, 2], [1, 1, 1], [3, 3, 3], [0, 0, 0],
                        [0, 0, 0]])
    subs2 = numpy.array([[0, 2, 4], [0, 0, 2], [1, 1, 1], [3, 3, 3], [0, 0, 0],
                         [0, 0, 0]])
    vals = numpy.array([[0.5], [1.5], [2.5], [3.5], [4.5], [5.5]])
    vals2 = numpy.array([[0.5], [1.5], [100], [3.5], [4.5], [5.5]])
    siz = numpy.array([4, 4, 4])

    obj = sptensor.sptensor(subs, vals, siz)
    obj2 = sptensor.sptensor(subs2, vals2, siz)
    if (verbose == 1):
        print obj == obj2
    if (verbose == 1):
        print obj == obj
    if (verbose == 1):
        print obj + 100
        print obj - 100
        print obj * 3.4
    if (verbose == 1):
        print obj + obj2
        print obj - obj2
Example #21
0
def generateRandomProblem(MFull):
    ## calculate the two together
    nnz = np.nonzero(MFull.data)
    mfVals = MFull.data.flatten()
    xVals = np.reshape([np.random.poisson(l) for l in mfVals],
                       (len(mfVals), 1))
    Xsubs = np.zeros((len(mfVals), MFull.ndims()))
    Xsubs.dtype = 'int'
    for n in range(MFull.ndims()):
        Xsubs[:, n] = nnz[n]
    X = sptensor.sptensor(Xsubs, xVals, MFull.shape)
    ## return the observation
    return X
Example #22
0
def ctor(verbose):
    x = numpy.array([[[0, 0, 0.9052], [0.9121, 0, 0.7363]],
                     [[0.1757, 0.2089, 0], [0, 0.7455, 0]],
                     [[0, 0, 0.6754], [0, 0, 0]]])
    obj = sptenmat.sptenmat(x, [0], [1, 2], [10, 10, 10])
    print obj

    subs = numpy.array([[1, 3, 5], [1, 1, 0], [2, 2, 2], [3, 4, 4], [1, 1, 1],
                        [1, 1, 1]])
    vals = numpy.array([[0.5], [1.5], [100], [3.5], [4.5], [5.5]])
    siz = numpy.array([4, 5, 6])
    spt = sptensor.sptensor(subs, vals, siz)

    print spt

    obj = sptenmat.sptenmat(spt, [0, 1], [2])
    print obj
Example #23
0
    def tosptensor(self):
        # extract the shape of sptensor
        newshape = self.tsize

        #extract the subscripts of sptensor
        rowsubs = []
        if (len(self.rdims) != 0):
            rowshape = []
            for i in range(0, len(self.rdims)):
                rowshape.extend([self.tsize[self.rdims[i]]])

            for i in range(0, len(self.subs)):
                rowsubs.extend([tools.ind2sub(rowshape, self.subs[i][0])])
        rowsubs = numpy.array(rowsubs)

        colsubs = []
        if (len(self.cdims) != 0):
            colshape = []
            for i in range(0, len(self.cdims)):
                colshape.extend([self.tsize[self.cdims[i]]])

            for i in range(0, len(self.subs)):
                colsubs.extend([tools.ind2sub(colshape, self.subs[i][1])])
        colsubs = numpy.array(colsubs)

        newsubs = []
        for i in range(0, len(self.subs)):
            newsubs.extend([[]])

        for k in range(0, len(newshape)):
            find = tools.find(self.rdims, k)
            if (find != -1):
                newsubs = numpy.concatenate(
                    (newsubs, rowsubs[:, find].reshape([len(self.subs), 1])),
                    axis=1)
            else:
                find = tools.find(self.cdims, k)
                newsubs = numpy.concatenate(
                    (newsubs, colsubs[:, find].reshape([len(self.subs), 1])),
                    axis=1)

        #extract the values of sptensor
        newvals = self.vals

        return sptensor.sptensor(numpy.array(newsubs, dtype="int"), newvals,
                                 newshape)
Example #24
0
def ctor(verbose):
    x = numpy.array([
        [[0,0,0.9052],[0.9121,0,0.7363]],
        [[0.1757,0.2089,0],[0,0.7455,0]],
        [[0,0,0.6754],[0,0,0]]
        ])
    obj = sptenmat.sptenmat(x, [0], [1,2], [10,10,10]);
    print obj;
    
    subs = numpy.array([[1, 3, 5], [1, 1, 0], [2, 2, 2], [3, 4, 4], [1, 1, 1], [1, 1, 1]]);
    vals = numpy.array([[0.5], [1.5], [100], [3.5], [4.5], [5.5]]);
    siz = numpy.array([4, 5, 6]);
    spt = sptensor.sptensor(subs, vals, siz);
    
    print spt;
    
    obj = sptenmat.sptenmat(spt, [0,1], [2]);
    print obj;
Example #25
0
def ctor(verbose):
    subs = numpy.array([[0, 0, 0], [0, 0, 2], [1, 1, 1], [3, 3, 3], [0, 0, 0], [0, 0, 0]]);
    subs2 = numpy.array([[0, 2, 3], [0, 0, 2], [1, 1, 1], [3, 3, 3], [0, 0, 0], [0, 0, 0]]);
    vals = numpy.array([[0.5], [1.5], [2.5], [3.5], [4.5], [5.5]]);
    vals2 = numpy.array([[0.5], [1.5], [100], [3.5], [4.5], [5.5]]);
    siz = numpy.array([5, 5, 5]);


    if(verbose == 1):
        print sptensor.sptensor(subs, vals, siz);
        print sptensor.sptensor(subs, vals);
    
    obj2 = sptensor.sptensor(subs2, vals2, siz);
    if(verbose == 1):
        print obj2;
        print obj2.totensor();
        print sptensor.sptensor(subs, vals).totensor();
def generateRandomProblem(MFull):
	X = []
	for M in MFull:
		## get the non-zero entries
		nnz = np.nonzero(M.data)
		mfVals = M.data.flatten()
		xVals = np.reshape([np.random.poisson(l) for l in mfVals], (len(mfVals), 1))
		xSubs = np.zeros((len(mfVals), M.ndims()))
		xSubs.dtype = 'int'
		for n in range(M.ndims()):
			xSubs[:, n] = nnz[n]
		## figure out which ones are non-zero and build X with it to avoid extraneous properties
		nnzX = np.nonzero(xVals)
		print "Number of nonzeros:" + str(len(nnzX[0]))
		xVals = xVals[nnzX[0],:]
		xSubs = xSubs[nnzX[0],:]
		X.append(sptensor.sptensor(xSubs, xVals, M.shape))
	## return the observation
	return X
Example #27
0
def ctor(verbose):
    subs = numpy.array([[0, 0, 0], [0, 0, 2], [1, 1, 1], [3, 3, 3], [0, 0, 0],
                        [0, 0, 0]])
    subs2 = numpy.array([[0, 2, 3], [0, 0, 2], [1, 1, 1], [3, 3, 3], [0, 0, 0],
                         [0, 0, 0]])
    vals = numpy.array([[0.5], [1.5], [2.5], [3.5], [4.5], [5.5]])
    vals2 = numpy.array([[0.5], [1.5], [100], [3.5], [4.5], [5.5]])
    siz = numpy.array([5, 5, 5])

    if (verbose == 1):
        print sptensor.sptensor(subs, vals, siz)
        print sptensor.sptensor(subs, vals)

    obj2 = sptensor.sptensor(subs2, vals2, siz)
    if (verbose == 1):
        print obj2
        print obj2.totensor()
        print sptensor.sptensor(subs, vals).totensor()
Example #28
0
 def tosptensor(self):
     # extract the shape of sptensor
     newshape = self.tsize;
     
     #extract the subscripts of sptensor
     rowsubs = [];
     if (len(self.rdims) != 0):
         rowshape = [];
         for i in range(0, len(self.rdims)):
             rowshape.extend([self.tsize[self.rdims[i]]]);
             
         for i in range(0, len(self.subs)):
             rowsubs.extend([tools.ind2sub(rowshape,self.subs[i][0])]);
     rowsubs = numpy.array(rowsubs);
     
     colsubs = [];
     if (len(self.cdims) != 0):
         colshape = [];
         for i in range(0, len(self.cdims)):
             colshape.extend([self.tsize[self.cdims[i]]]);
             
         for i in range(0, len(self.subs)):
             colsubs.extend([tools.ind2sub(colshape,self.subs[i][1])]);
     colsubs = numpy.array(colsubs);
     
     newsubs = [];
     for i in range(0, len(self.subs)):
         newsubs.extend([[]]);
     
     
     for k in range(0, len(newshape)):
         find = tools.find(self.rdims,k);
         if(find != -1):
             newsubs = numpy.concatenate((newsubs, rowsubs[:,find].reshape([len(self.subs),1])), axis = 1);
         else:
             find = tools.find(self.cdims,k);
             newsubs = numpy.concatenate((newsubs, colsubs[:,find].reshape([len(self.subs),1])), axis = 1);
     
     #extract the values of sptensor
     newvals = self.vals;
     
     return sptensor.sptensor(newsubs, newvals, newshape);
def generateRandomProblem(MFull):
    X = []
    for M in MFull:
        ## get the non-zero entries
        nnz = np.nonzero(M.data)
        mfVals = M.data.flatten()
        xVals = np.reshape([np.random.poisson(l) for l in mfVals],
                           (len(mfVals), 1))
        xSubs = np.zeros((len(mfVals), M.ndims()))
        xSubs.dtype = 'int'
        for n in range(M.ndims()):
            xSubs[:, n] = nnz[n]
        ## figure out which ones are non-zero and build X with it to avoid extraneous properties
        nnzX = np.nonzero(xVals)
        print "Number of nonzeros:" + str(len(nnzX[0]))
        xVals = xVals[nnzX[0], :]
        xSubs = xSubs[nnzX[0], :]
        X.append(sptensor.sptensor(xSubs, xVals, M.shape))
    ## return the observation
    return X
def tensorSubset(origTensor, subsetIds, subsetShape):
    """ 
    Get a subset of the tensor specified by the subsetIds
    
    Parameters
    ------------
    X : the original tensor
    subsetIds : a list of indices
    subsetShape : the shape of the new tensor
    
    Output
    -----------
    subsetX : the tensor with the indices rebased
    """
    subsetIdx = np.in1d(origTensor.subs[:, 0].ravel(), subsetIds)
    subsIdx = np.where(subsetIdx)[0]
    subsetSubs = origTensor.subs[subsIdx, :]
    subsetVals = origTensor.vals[subsIdx]
    # reindex the 0th mode
    subsetSubs = rebase(subsetIds, subsetSubs)
    return sptensor.sptensor(subsetSubs, subsetVals, subsetShape)
def tensorSubset(origTensor, subsetIds, subsetShape):
    """ 
    Get a subset of the tensor specified by the subsetIds
    
    Parameters
    ------------
    X : the original tensor
    subsetIds : a list of indices
    subsetShape : the shape of the new tensor
    
    Output
    -----------
    subsetX : the tensor with the indices rebased
    """
    subsetIdx = np.in1d(origTensor.subs[:, 0].ravel(), subsetIds)
    subsIdx = np.where(subsetIdx)[0]
    subsetSubs = origTensor.subs[subsIdx, :]
    subsetVals = origTensor.vals[subsIdx]
    # reindex the 0th mode
    subsetSubs = rebase(subsetIds, subsetSubs)
    return sptensor.sptensor(subsetSubs, subsetVals, subsetShape)
def constructTensor(med_file, diag_file):    
    diag_med_comb = diag_cross_med(med_file, diag_file)
	## create index map for subject_id, icdcode, and med_name
    patDict = createIndexMap(diag_med_comb.subject_id)
    medDict = createIndexMap(np.hstack(diag_med_comb.med_name))
    diagDict = createIndexMap(np.hstack(diag_med_comb.code))
    
    tensorIdx = np.array([[0,0,0]])
    tensorVal = np.array([[0]])
    for i in xrange(diag_med_comb.shape[0]):
        curDiag = [diagDict[x] for x in diag_med_comb.iloc[i,0]]
        curMed = [medDict[x] for x in diag_med_comb.iloc[i,1]]
        curPatId = patDict[diag_med_comb.iloc[i,2]]
        dmCombo = extmath.cartesian((curDiag, curMed))
        tensorIdx = np.append(tensorIdx,np.column_stack((np.repeat(curPatId, dmCombo.shape[0]), dmCombo)),axis=0)
        tensorVal = np.append(tensorVal, np.ones((dmCombo.shape[0],1), dtype=np.int), axis=0)

    tensorIdx = np.delete(tensorIdx, (0), axis=0)
    tensorVal = np.delete(tensorVal, (0), axis=0)
    tenX = sptensor.sptensor(tensorIdx, tensorVal, np.array([len(patDict), len(diagDict), len(medDict)]))
    axisDict = {0: patDict, 1: diagDict, 2: medDict}
    
    return tenX, axisDict
Example #33
0
def parseCarrier(f):
    headerRow = True
    patientId = None
    claimId = None
    procHier = loadJSON("cpt.json")
    icdHier = loadJSON("icd.json")
    patDict = OrderedDict(sorted({}.items(), key=lambda t: t[1]))
    diagDict = OrderedDict(sorted({}.items(), key=lambda t: t[1]))
    procDict = OrderedDict(sorted({}.items(), key=lambda t: t[1]))
    ## store the tensor index in an array
    tensorIdx = np.array([[0, 0, 0]])
    tensorVal = np.array([[0]])
    pid = 0

    for row in csv.reader(open(f, "rb")):
        if pid > 10000:
            break
        # For the header, we will get the values we need
        if headerRow:
            pidIdx = [
                i for i, item in enumerate(row)
                if re.search('DESYNPUF_ID', item)
            ][0]
            claimIdx = [
                i for i, item in enumerate(row) if re.search('CLM_ID', item)
            ][0]
            diagIdx = [
                i for i, item in enumerate(row)
                if re.search('ICD9_DGNS', item)
            ]
            hcpcsIdx = [
                i for i, item in enumerate(row) if re.search('HCPCS_CD', item)
            ]
            headerRow = False
            continue
        ## get the diagnosis and procedure codes
        diagArray, diagCat = getDiagnosis(row, diagIdx, icdHier)
        for dc in set(diagCat):
            if not diagDict.has_key(dc):
                diagDict[dc] = len(diagDict)
        hcpcsArray, hcpcsCat = getProc(row, hcpcsIdx, procHier)
        for pc in set(hcpcsCat):
            if not procDict.has_key(pc):
                procDict[pc] = len(procDict)
        diagList = [diagDict[dc] for dc in diagCat]
        procList = [procDict[pc] for pc in hcpcsCat]
        if claimId == row[claimIdx]:
            ## same claim means same patient, so just add
            claimDiag.extend(diagList)
            claimHcpcs.extend(procList)
            continue
        if claimId != None:
            ## otherwise claim is different - so store off the old claim
            if len(claimDiag) > 0 and len(claimHcpcs) > 0:
                dpCombo = extmath.cartesian((claimDiag, claimHcpcs))
                pid = patDict[patientId]
                tensorIdx = np.append(tensorIdx,
                                      np.column_stack(
                                          (np.repeat(pid, dpCombo.shape[0]),
                                           dpCombo)),
                                      axis=0)
                tensorVal = np.append(tensorVal,
                                      np.ones((dpCombo.shape[0], 1),
                                              dtype=np.int),
                                      axis=0)
        ## now we juse just update the new patient
        patientId = row[pidIdx]
        claimId = row[claimIdx]
        if not patDict.has_key(patientId):
            patDict[patientId] = len(patDict)
        claimDiag = diagList
        claimHcpcs = procList
        pid += 1
    tensorIdx = np.delete(tensorIdx, (0), axis=0)
    tensorVal = np.delete(tensorVal, (0), axis=0)
    tenX = sptensor.sptensor(
        tensorIdx, tensorVal,
        np.array([len(patDict), len(diagDict),
                  len(procDict)]))
    axisDict = {0: patDict, 1: diagDict, 2: procDict}
    return tenX, axisDict
Example #34
0
R = 40
iters = 70
samples = 10

pcaModel = RandomizedPCA(n_components=R)
stats = np.zeros((1, 6))

parser = argparse.ArgumentParser()
parser.add_argument("pat", type=int, help="number of patients")
args = parser.parse_args()
pn = args.pat

patList = np.arange(pn)
ix = np.in1d(X.subs[:, 0].ravel(), patList)
idx = np.where(ix)[0]
xprime = sptensor.sptensor(X.subs[idx, :], X.vals[idx],
                           [pn, X.shape[1], X.shape[2]])
flatX = sptenmat.sptenmat(xprime,
                          [0]).tocsrmat()  # matricize along the first mode
stats = np.zeros((1, 6))

## NMF Timing
for k in range(samples):
    startTime = time.time()
    nmfModel = nimfa.mf(flatX, method="nmf", max_iter=iters, rank=R)
    nmfResult = nimfa.mf_run(nmfModel)
    elapsed = time.time() - startTime
    stats = np.vstack((stats, np.array([R, iters, pn, k, "NMF", elapsed])))

## PCA Timing
for k in range(samples):
    startTime = time.time()
import tensor;
import sptensor;
import numpy as np;
import CP_APR
import ktensor
import KLProjection

""" 
Test file associated with the CP decomposition using APR
"""

""" Test factorization of sparse matrix """
subs = np.array([[0,3,1], [1,0,1], [1,2,1], [1,3,1], [3,0,0]]);
vals = np.array([[1],[1],[1],[1],[3]]);
siz = np.array([5,5,2]) # 5x5x2 tensor
X = sptensor.sptensor(subs, vals, siz)
U0 = np.array([[0.7689, 0.8843, 0.7487, 0.0900], [0.1673, 0.5880, 0.8256, 0.1117], [0.8620, 0.1548, 0.7900, 0.1363], [0.9899, 0.1999, 0.3185, 0.6787], [0.5144, 0.4070, 0.5341, 0.4952]])
U1 = np.array([[0.1897, 0.5606, 0.8790, 0.9900], [0.4950, 0.9296, 0.9889, 0.5277], [0.1476, 0.6967, 0.0006, 0.4795], [0.0550, 0.5828, 0.8654, 0.8013], [0.8507, 0.8154, 0.6126, 0.2278]])
U2 = np.array([[0.4981, 0.5747, 0.7386, 0.2467], [0.9009, 0.8452, 0.5860, 0.6664]])
Minit = ktensor.ktensor(np.ones(4), [U0, U1, U2])
fms = Minit.fms(Minit)

Y, cpstats, modelStats = CP_APR.cp_apr(X,4, Minit=Minit, maxiters=100);
Y.normalize_sort(1)

subs2 = np.array([[0,3,1], [1,2,0]])
vals2 = np.array([[1], [1]])
siz2 = np.array([2,5,2])
Xhat = sptensor.sptensor(subs2, vals2, siz2)

klproj = KLProjection.KLProjection(Y.U, 4)
matrix_pkl = open("./nparr_pt_jdrange_med.pickle", "rb")
nparr_pt_jdrange_med = pickle.load(matrix_pkl)
matrix_pkl.close()

##########################################################################################

# build SPARSE tensor from our data
num_dims = len(nparr_pt_jdrange_med_binary.shape)
nnz = np.nonzero(nparr_pt_jdrange_med_binary)
data_values = nparr_pt_jdrange_med_binary[nnz].flatten()
data_values = np.reshape(data_values, (len(data_values), 1))
nonzero_subs = np.zeros((len(data_values), num_dims))
nonzero_subs.dtype = 'int'
for n in range(num_dims):
    nonzero_subs[:, n] = nnz[n]
sparse_tensor_all_finite = sptensor.sptensor(nonzero_subs, data_values)

##classification for patients####
##classification for patients: use MAP_CHANGE < -2 as a positive change
#patients needed:
l_patients_for_tensor = np.sort(list(df_MAP_CHANGE_finite.RUID))
l_patDict_idx_patients_for_tensor = np.sort(
    [patDict[ruid] for ruid in l_patients_for_tensor])
nparr_pt_jdrange_med_binary_subset = nparr_pt_jdrange_med_binary[
    l_patDict_idx_patients_for_tensor]

#build axisDict
patDict = OrderedDict(sorted({}.items(),
                             key=lambda t: t[1]))  #axis dict, patient mode
medDict = OrderedDict(sorted({}.items(),
                             key=lambda t: t[1]))  #axis dict, med mode
Example #37
0
import ktensor
import predictionModel

#set data dirs
data_dir = 'E:/test_project_python/XiamenData/'

trainX = pd.read_csv(
    data_dir + 'SubTrainTensor_2387.csv',
    index_col=0,
)
trainX = trainX.drop(['PID'], axis=1)
trainXIndex = np.array(trainX.ix[:, :3].as_matrix(), dtype='int')
trainXValue = np.array(trainX.ix[:, 3].as_matrix(), dtype='int').reshape(
    (trainXIndex.shape[0], 1))
trainXSize = np.array([2387, 247, 816])
trainTensor = sptensor.sptensor(trainXIndex, trainXValue, trainXSize)

testX = pd.read_csv(
    data_dir + 'SubTestTensor_1024.csv',
    index_col=0,
)
testX = testX.drop(['PID'], axis=1)
testXIndex = np.array(testX.ix[:, :3].as_matrix(), dtype='int')
testXValue = np.array(testX.ix[:, 3].as_matrix(), dtype='int').reshape(
    (testXIndex.shape[0], 1))
testXSize = np.array([1024, 247, 816])
testTensor = sptensor.sptensor(testXIndex, testXValue, testXSize)

trainRe = pd.read_csv(data_dir + 'TrainResult_2387.csv')
trainY = trainRe.ix[:, -1].as_matrix()
trainY[trainY == 0] = -1
Example #38
0
labelID = 1
outfile = 'results/iter-db-5-{0}.csv'
set_desc = 'HF Patients Level 0 seed 0'
infile = file("data/hf-tensor-label1-level0-data.dat", "rb")

sqlLoadFile = "results/iter-{0}.sql".format(modelID)
statsFile = "results/iter-stats-{0}.csv".format(modelID)
fmsFile = "results/iter-fms-{0}.csv".format(modelID)

# load the sparse tensor information
subs = np.load(infile)
vals = np.load(infile)
siz = np.load(infile)
infile.close()
# now factor it
X = sptensor.sptensor(subs, vals, siz)
# Create a random initialization
N = X.ndims()
np.random.seed(0)
F = [];
for n in range(N):
    F.append(np.random.rand(X.shape[n], R))

Minit = ktensor.ktensor(np.ones(R), F)
Y, ystats, fmsStats, mstats = cp_apr(X, R, Minit=Minit, outputfile=outfile, maxiters=iter)

## automate the creation of the sql file
ystats = np.column_stack((np.repeat(modelID, ystats.shape[0]), ystats))
np.savetxt(statsFile, ystats, delimiter="|")

fmsStats = np.column_stack((np.repeat(modelID, fmsStats.shape[0]), fmsStats))
df_MAP_CHANGE_first_10_ruid['MAP_CHANGE_GOOD'] = df_MAP_CHANGE_first_10_ruid['MAP_CHANGE_GOOD'].astype('int')
l_patClass = df_MAP_CHANGE_first_10_ruid['MAP_CHANGE_GOOD']
od_patClass_first_10_ruid = OrderedDict(zip(patDict.keys(), l_patClass))


# build SPARSE tensor from our data
nparr_data_by_pt = np.array(l_data_pt_med_jdrange)
num_dims = len(nparr_data_by_pt.shape)
nnz = np.nonzero(nparr_data_by_pt)
data_values = nparr_data_by_pt[nnz].flatten()
data_values = np.reshape(data_values, (len(data_values), 1))
nonzero_subs = np.zeros((len(data_values), num_dims))
nonzero_subs.dtype = 'int'
for n in range(num_dims):
    nonzero_subs[:, n] = nnz[n]
sparse_tensor_first_10_ruid = sptensor.sptensor(nonzero_subs, data_values)


#save the tensor
tensorIO.saveSingleTensor(sparse_tensor_first_10_ruid, axisDict, od_patClass_first_10_ruid, "htn-first10-tensor-{0}.dat") #

### LEFT OFF HERE: june 25, 6pm ##################################################################

## load the tensor #######
loaded_X, loaded_axisDict, loaded_classDict = tensorIO.loadSingleTensor("htn-first10-tensor-{0}.dat")

## do the decomposition ######
#store the data in "data"
data = {'exptID': exptID, 'size': MSize, 'sparsity': AFill, "rank": R, "alpha": alpha, "gamma": gamma}

def calculateValues(TM, M):
import tensor
import sptensor
import numpy as np
import CP_APR
import ktensor
import KLProjection
""" 
Test file associated with the CP decomposition using APR
"""
""" Test factorization of sparse matrix """
subs = np.array([[0, 3, 1], [1, 0, 1], [1, 2, 1], [1, 3, 1], [3, 0, 0]])
vals = np.array([[1], [1], [1], [1], [3]])
siz = np.array([5, 5, 2])  # 5x5x2 tensor
X = sptensor.sptensor(subs, vals, siz)
U0 = np.array([[0.7689, 0.8843, 0.7487, 0.0900],
               [0.1673, 0.5880, 0.8256, 0.1117],
               [0.8620, 0.1548, 0.7900, 0.1363],
               [0.9899, 0.1999, 0.3185, 0.6787],
               [0.5144, 0.4070, 0.5341, 0.4952]])
U1 = np.array([[0.1897, 0.5606, 0.8790, 0.9900],
               [0.4950, 0.9296, 0.9889, 0.5277],
               [0.1476, 0.6967, 0.0006, 0.4795],
               [0.0550, 0.5828, 0.8654, 0.8013],
               [0.8507, 0.8154, 0.6126, 0.2278]])
U2 = np.array([[0.4981, 0.5747, 0.7386, 0.2467],
               [0.9009, 0.8452, 0.5860, 0.6664]])
Minit = ktensor.ktensor(np.ones(4), [U0, U1, U2])
fms = Minit.fms(Minit)

Y, cpstats, modelStats = CP_APR.cp_apr(X, 4, Minit=Minit, maxiters=100)
Y.normalize_sort(1)
Example #41
0
# from spase representation file to spase tensor
x = pd.read_csv(data_dir + 'validationTensor_3412.csv').drop(['PID'], axis=1)
#print x
matrixIndex = np.array(x.ix[:, :3].as_matrix(), dtype='int')
matrixValue = np.array(x.ix[:, 3].as_matrix(), dtype='int').reshape(
    (matrixIndex.shape[0], 1))
matrixSize = np.array([3412, 247, 816])

#print matrixValue
re = pd.read_csv(data_dir + 'vadationResult_3412.csv')
#print re
Y = np.array(re.ix[:, 'InHosLabel'])
#print Y

X = sptensor.sptensor(matrixIndex, matrixValue, matrixSize)
#print X.subs
#print X.subs.shape
'''
demoX=pd.read_csv(data_notebook+'demoF.csv')
demoX.index=demoX.ix[:,0]
demoX=np.array(demoX.ix[:,1:])
#demoX=demoX[:,1:]

print(demoX.shape)
#print demoX[:3]
'''

goNum = [10, 30, 50, 80, 100, 125, 150, 180, 200]
for i in range(len(goNum)):
    phennum = goNum[i]
Example #42
0
import tensor
import sptensor
import numpy as np
import CP_APR
import ktensor
""" 
Test file associated with the CP decomposition using APR
"""
""" Test factorization of sparse matrix """
subs = np.array([[0, 3, 1], [1, 0, 1], [1, 2, 1], [1, 3, 1], [3, 0, 0]])
vals = np.array([[1], [1], [1], [1], [3]])
siz = np.array([5, 5, 2])  # 5x5x2 tensor
X = sptensor.sptensor(subs, vals, siz)
U0 = np.array([[0.7689, 0.8843, 0.7487, 0.0900],
               [0.1673, 0.5880, 0.8256, 0.1117],
               [0.8620, 0.1548, 0.7900, 0.1363],
               [0.9899, 0.1999, 0.3185, 0.6787],
               [0.5144, 0.4070, 0.5341, 0.4952]])
U1 = np.array([[0.1897, 0.5606, 0.8790, 0.9900],
               [0.4950, 0.9296, 0.9889, 0.5277],
               [0.1476, 0.6967, 0.0006, 0.4795],
               [0.0550, 0.5828, 0.8654, 0.8013],
               [0.8507, 0.8154, 0.6126, 0.2278]])
U2 = np.array([[0.4981, 0.5747, 0.7386, 0.2467],
               [0.9009, 0.8452, 0.5860, 0.6664]])
Minit = ktensor.ktensor(np.ones(4), [U0, U1, U2])
fms = Minit.fms(Minit)

Y, cpstats, modelStats = CP_APR.cp_apr(X, 4, Minit=Minit, maxiters=100)
Y.normalize_sort(1)
""" Test factorization of regular matrix """
Example #43
0
R = 40
iters=70
samples=10

pcaModel = RandomizedPCA(n_components=R)
stats = np.zeros((1, 6))

parser = argparse.ArgumentParser()
parser.add_argument("pat", type=int, help="number of patients")
args = parser.parse_args()
pn = args.pat

patList = np.arange(pn)
ix = np.in1d(X.subs[:,0].ravel(), patList)
idx = np.where(ix)[0]
xprime = sptensor.sptensor(X.subs[idx, :], X.vals[idx], [pn, X.shape[1], X.shape[2]])
flatX = sptenmat.sptenmat(xprime, [0]).tocsrmat() # matricize along the first mode
stats = np.zeros((1,6))

## NMF Timing
for k in range(samples):
    startTime = time.time()
    nmfModel = nimfa.mf(flatX, method="nmf", max_iter=iters, rank=R)
    nmfResult = nimfa.mf_run(nmfModel)
    elapsed = time.time() - startTime
    stats = np.vstack((stats, np.array([R, iters, pn, k, "NMF", elapsed])))
    
## PCA Timing
for k in range(samples):
    startTime = time.time()
    pcaModel.fit(flatX)
    noiseNum = int(totNonzero*noise)
    noiseVals = np.random.poisson(lam=noiseParam, size=noiseNum)
    noiseSubs = np.random.randint(low=0, high=totNonzero, size=noiseNum)
    ## first choose a number between 0 and 1 to denote add or subtract
    noiseOp = np.random.randint(low=0, high=2, size=noiseNum)
    addIdx = np.where(noiseOp == 0)[0]
    Y = X.copy()
    Y.vals[noiseSubs[addIdx], 0] = Y.vals[noiseSubs[addIdx], 0] + noiseVals[addIdx]
    ## do the subtraction
    subtractIdx = np.where(noiseOp == 1)[0]
    Y.vals[noiseSubs[subtractIdx], 0] = Y.vals[noiseSubs[subtractIdx], 0] - noiseVals[subtractIdx]
    ## anything that was zero-ed out we want to fix
    nozIdx = np.where(Y.vals <= 0)[0]
    Y.vals[nozIdx] = 0
    ## then we will add more by sampling empty space
    nozVals = np.random.poisson(lam=1, size=len(nozIdx)).reshape(len(nozIdx), 1)
    nozVals[np.where(nozVals == 0)] = 1
    nozSub0 = np.random.randint(low=0, high=Y.shape[0], size=len(nozVals))
    nozSub1 = np.random.randint(low=0, high=Y.shape[1], size=len(nozVals))
    nozSub2 = np.random.randint(low=0, high=Y.shape[2], size=len(nozVals))
    nozSubs = np.column_stack((nozSub0, nozSub1, nozSub2))
    Y.subs = np.vstack((Y.subs, nozSubs))
    Y.vals = np.vstack((Y.vals, nozVals))
    Y = sptensor.sptensor(Y.subs, Y.vals, Y.shape)
    noiseTF = factorTensor(Y)
    fms = baseTF.greedy_fms(noiseTF)

    outfile.write(json.dumps({"expt": exptID, "type": "add+subtract", "noise": noise, 
        "seed": seed, "rank": R, "0": fms['0'], "1": fms['1'], "2": fms['2']}) + "\n")

outfile.close()
    Y.vals[noiseSubs[subtractIdx],
           0] = Y.vals[noiseSubs[subtractIdx], 0] - noiseVals[subtractIdx]
    ## anything that was zero-ed out we want to fix
    nozIdx = np.where(Y.vals <= 0)[0]
    Y.vals[nozIdx] = 0
    ## then we will add more by sampling empty space
    nozVals = np.random.poisson(lam=1,
                                size=len(nozIdx)).reshape(len(nozIdx), 1)
    nozVals[np.where(nozVals == 0)] = 1
    nozSub0 = np.random.randint(low=0, high=Y.shape[0], size=len(nozVals))
    nozSub1 = np.random.randint(low=0, high=Y.shape[1], size=len(nozVals))
    nozSub2 = np.random.randint(low=0, high=Y.shape[2], size=len(nozVals))
    nozSubs = np.column_stack((nozSub0, nozSub1, nozSub2))
    Y.subs = np.vstack((Y.subs, nozSubs))
    Y.vals = np.vstack((Y.vals, nozVals))
    Y = sptensor.sptensor(Y.subs, Y.vals, Y.shape)
    noiseTF = factorTensor(Y)
    fms = baseTF.greedy_fms(noiseTF)

    outfile.write(
        json.dumps({
            "expt": exptID,
            "type": "add+subtract",
            "noise": noise,
            "seed": seed,
            "rank": R,
            "0": fms['0'],
            "1": fms['1'],
            "2": fms['2']
        }) + "\n")


##########################################################################################


# build SPARSE tensor from our data
num_dims = len(nparr_pt_jdrange_med_binary.shape)
nnz = np.nonzero(nparr_pt_jdrange_med_binary)
data_values = nparr_pt_jdrange_med_binary[nnz].flatten()
data_values = np.reshape(data_values, (len(data_values), 1))
nonzero_subs = np.zeros((len(data_values), num_dims))
nonzero_subs.dtype = 'int'
for n in range(num_dims):
    nonzero_subs[:, n] = nnz[n]
sparse_tensor_all_finite = sptensor.sptensor(nonzero_subs, data_values)



##classification for patients####
##classification for patients: use MAP_CHANGE < -2 as a positive change
#patients needed: 
l_patients_for_tensor = np.sort(list(df_MAP_CHANGE_finite.RUID))
l_patDict_idx_patients_for_tensor = np.sort([patDict[ruid] for ruid in l_patients_for_tensor])
nparr_pt_jdrange_med_binary_subset = nparr_pt_jdrange_med_binary[l_patDict_idx_patients_for_tensor]

#build axisDict
patDict = OrderedDict(sorted({}.items(), key= lambda t:t[1])) #axis dict, patient mode
medDict =  OrderedDict(sorted({}.items(), key= lambda t:t[1])) #axis dict, med mode
jdDict = OrderedDict(sorted({}.items(), key= lambda t:t[1])) #axis dict, jd mode
jdrangeDict = OrderedDict(sorted({}.items(), key= lambda t:t[1])) #axis dict, jdrange mode
Example #47
0
import tensor
import sptensor
import numpy as np
import sim_APR
import ktensor
""" Test factorization of sparse matrix """
subs = np.array([[0, 3, 1], [1, 0, 1], [1, 2, 1], [1, 3, 1], [3, 0, 0]])
vals = np.array([[1], [1], [1], [1], [3]])
siz = np.array([5, 5, 2])  # 5x5x2 tensor
# do the tensor with the same one
X = [sptensor.sptensor(subs, vals, siz), sptensor.sptensor(subs, vals, siz)]
sharedModes = [np.array([[0, 0], [1, 1]])]
sapr = sim_APR.SAPR(X, 4, sharedModes)
sapr.factorize()

print sapr.M
Example #48
0
import tensor
import sptensor
import numpy as np
import sim_APR
import ktensor

""" Test factorization of sparse matrix """
subs = np.array([[0, 3, 1], [1, 0, 1], [1, 2, 1], [1, 3, 1], [3, 0, 0]])
vals = np.array([[1], [1], [1], [1], [3]])
siz = np.array([5, 5, 2])  # 5x5x2 tensor
# do the tensor with the same one
X = [sptensor.sptensor(subs, vals, siz), sptensor.sptensor(subs, vals, siz)]
sharedModes = [np.array([[0, 0], [1, 1]])]
sapr = sim_APR.SAPR(X, 4, sharedModes)
sapr.factorize()

print sapr.M
Example #49
0
import sys
sys.path.insert(0, './pytensor')

import sptensor

# Set logging to DEBUG to see CP-ALS information
logging.basicConfig(level=logging.DEBUG, format='%(asctime)-15s %(message)s')

file = '../datasets/movielens-synthesized/ratings-synthesized-50k.csv'
logging.debug("Loading dataset from file: %s", file)
data = genfromtxt(file, delimiter=',', skip_header=1)

logging.debug("Loaded data")

# we need to convert data into two lists; subscripts/coordinates and values
n = len(data)

subs_1 = numpy.append(data[:,:2], numpy.zeros((n, 1)), 1)
subs_2 = numpy.append(data[:,:2], numpy.ones((n, 1)), 1)

subs = numpy.vstack([subs_1, subs_2])
subs = subs.astype(int)

vals = numpy.hstack([data[:,2], data[:, 3]])
vals = vals.flatten()
vals = [[x] for i,x in enumerate(vals)]
vals = numpy.array(vals)

spten2 = sptensor.sptensor(subs, vals)
print spten2.shape