def calcTCI (mutcnaMatrixFN, degMatrixFN, tumorTypeFN = None, alphaNull = [1, 1], alphaIJKList = [2, 1, 1, 2], v0 = 0.3, 
             ppiDict = None, dictGeneLength = None, outputPath = ".", opFlag = None, PANCANFlag = None, rowBegin=0, rowEnd = None):
    """ 
    calcTCI (mutcnaMatrix, degMatrix, alphaIJList, alphaIJKList, dictGeneLength)
    
    Calculate the causal scores between each pair of SGA and DEG observed in each tumor
    
    Inputs:
        mutcnaMatrixFN      A file containing a N x G binary matrix containing the mutation and CNA 
                            data of all tumors.  N is the number of tumors and 
                            G is number of total number of unique genes.  For a
                            tumor, genes that have SGAs are indicated by "1"s and "0" 
                            otherwise. 
                            Note the last 19 columns are indicators of the tumor 
        degMatrixFN         A file contains a N x G' binary matrix representing DEG
                            status.  A "1" indicate a gene is differentially expressed
                            in a tumor.
                            
        tumorTypeFN         A string of filename.  The file contains N x T matrix, in which
                            each row only has one element set to 1, rest to zero, as an indicator
                            which type of cancer each tumor belongs to 
        
        alphaIJList         A list of Dirichlet hyperparameters defining the prior
                            that a mutation event occurs
                            
        alphaIJKList        A list of Dirichlet hyperparameters for caulate the prior
                            of condition probability parameters. alphaIJK[0]: mut == 0 && deg == 0;
                            alphaIJK[1]: mut == 0 && deg == 1; alphaIJK[2]: mut == 1 && deg == 0;
                            alphaIJK[3]: mut == 1 && deg == 1
                            
        v0                  A float scalar indicate the prior probability that a DEG
                            is caused by a non-SGA factor 
                            
        PANCANFlag          A boolean flag to indicate if we are doing PANCAN

        ppiDict             A dictionary keeps PPI network in the form an adjecency list (a dictionary of dictionary)

        dictGeneLength      A dictionary keeps the length of each of G genes in the 
                            mutcnaMatrix
	
	rowBegin, rowEnd       These two arguments control allow user to choose which block out of all tumors (defined by the two 
                			 row numbers) will be processes in by this function.  This can be used to process
                            mulitple block in a parallel fashion.
    """
    # check if gene length dictionary is set
    if not dictGeneLength :
        print "Gene length dictionary not provided, quit\n"
        sys.exit()
    
    # read in data in the form of NamedMatrix 
    try:
        mutcnaMatrix  = NamedMatrix(mutcnaMatrixFN)
    except:
        print "Failed to import data matrix %s\n" % mutcnaMatrixFN
        sys.exit() 
        
    try:
        degMatrix = NamedMatrix(degMatrixFN)
    except:
        print "Failed to import data matrix %s\n" % degMatrixFN
        sys.exit()
    
    mutGeneNames = mutcnaMatrix.getColnames()
    mutTumorNames = mutcnaMatrix.getRownames()
    degGeneNames = degMatrix.getColnames()
    exprsTumorNames = degMatrix.getRownames()
    
    #check if same tumor names from two matrices above agree
    if exprsTumorNames != mutTumorNames:
        print "The tumors for mutcnaMatrix and degMatrix do not fully overlap!"
        print degMatrix.getRownames()
        print mutcnaMatrix.getRownames()
        sys.exit()

    tumorNames = exprsTumorNames
    nTumors, nMutGenes = mutcnaMatrix.shape()
    
    # now perform PANCAN analysis related tasks
    if PANCANFlag: 
        if not tumorTypeFN:
            print "Cannot perform PANCAN analysis without tumor-type-indicator matrix"
            sys.exit()
        try: 
            tumorTypeMatrix = NamedMatrix(tumorTypeFN)
        except:
            print "Failed to import tumor type file %s" % tumorTypeFN
            sys.exit()            
        tumorTypeTumorNames = [x.replace("\"", "") for x in tumorTypeMatrix.getRownames()]
        if exprsTumorNames != tumorTypeTumorNames:
            print "The tumors for tumorTypeMatrix and degMatrix do not fully overlap!"
            sys.exit()

        tumorTypes = tumorTypeMatrix.getColnames()    
        # Calculate the prior probability that a tumor-type variable may influence a DEG
        # to be proportional to the number of tumors from a given type
        vt = np.sum(tumorTypeMatrix.data, 0)  # perform a rowsum to count  each type tumor
        vtprior = np.divide(vt, float(nTumors)) # normalize to 1, as  prior for each type of tumor
        
    # Now start looping through a chunk of individual tumors and calculate the causal scores between each pair of SGA and DEG    
    print "Done with loading data, start processing tumor " + str(rowBegin)
    if not rowEnd:
        rowEnd = nTumors - 1
    else:
        if rowEnd >= nTumors:
		rowEnd = nTumors - 1
	elif rowEnd < rowBegin:
            print "Invalid rowEnd < rowBegin arguments given."
            sys.exit()

    if rowBegin > rowEnd:
        print "Invlid rowBegin > rowEnd argument given."
        sys.exit()

    for t in range(rowBegin, rowEnd):
        print "processign tumor  " + tumorNames[t]
        #print pacifier
        if t % 50 == 0:
            print "\nProcessed %s tumors" % str(t)
        
        # collect data related to DEGs to construct a submatrix containing only DEG of the tumor
        degGeneIndx = [i for i, j in enumerate(degMatrix.data[t,:]) if j == 1]
        tumorDEGGenes = [degGeneNames[i] for i in degGeneIndx] 
        tumorDEGMatrix = degMatrix.data[:,degGeneIndx]
 
        # extract the sub-matrix of mutcnaMatrix that only contain the genes that are mutated in a given tumor t
        tumormutGeneIndx = [i for i, j in enumerate(mutcnaMatrix.data[t,:]) if j == 1]        
        tumorMutGenes=  [mutGeneNames[i] for i in tumormutGeneIndx] 
        nTumorMutGenes = len(tumorMutGenes)

        # now extract the sub-matrix of mutcnaMatrix that only contain the genes that are mutated in a given tumor t
        # check if special operations to create combinations of SGA events are needed.  If combination operation is needed, 
        # new combined muation matrix will be created                 
        if opFlag == OR:
            tumorMutMatrix = createORComb(tumorMutGenes, ppiDict, mutcnaMatrix)      
        else:  # default.  Extract columns of mutcnaMatrix corresponding to the altered genes          
            tumorMutMatrix = mutcnaMatrix.data[:,  tumormutGeneIndx]
        
        # Include the tumor-type label into the tumorMutMatrix as a tissue-specific 
        # fake Gt to capture the DEGs that has tissue-specific characterisitics 
        if PANCANFlag:
            tumorTypeLabelIndx = np.where(tumorTypeMatrix.data[t,:] == 1)[0]
            if len(tumorTypeLabelIndx) != 1:
                raise Exception("Fail to extract tumor type")  
            # add the label to the tumorMutGenes
            tumorMutMatrix = np.hstack((tumorMutMatrix, tumorTypeMatrix.data[:,tumorTypeLabelIndx]))                  
            tumorTypeName = tumorTypes[tumorTypeLabelIndx]        
            tumorMutGenes.append(tumorTypeName) 
            nTumorMutGenes = len(tumorMutGenes)
            
        # calculate single pairwise likelihood that an SGA causes a DEG.  Return a matrix where rows are mutGenes, 
        # columns are DEGs, currently without the joint impact
        tumorLnFScore = calcF(tumorMutMatrix, tumorDEGMatrix,  alphaIJKList)
                
        # If PANCAN analysis, construct combinations of tumor-type label with different GTs to determine the 
        # likelihood of DEG jointly conditioning on GT and tumor-type label.  This enables us to capture
        # the fact that a GT regulate a GE but they also have a high tendency in co-occurring in a specific tumor type            
        if PANCANFlag:  
            if opFlag == AND:
                raise Exception ("Combination of AND operation with PanCan analysis is not implemented")
                
            # Now, calcuate the log likelihood of joint impact of tumor label with individual GTs on each GE
            jointGTandTumorLableFScore = np.zeros((tumorMutMatrix.shape[1], tumorDEGMatrix.shape[1])) 
                
            # GT == 1 && Label == 1.  Use mulitplication as AND operation
            tmpMutMatrix = np.multiply(tumorMutMatrix, tumorTypeMatrix.data[:, tumorTypeLabelIndx])  
            tumorLnFScore = calcF(tmpMutMatrix, tumorDEGMatrix,  alphaIJKList)
            jointGTandTumorLableFScore = add(jointGTandTumorLableFScore, tumorLnFScore)
            
            # GT == 1 && label == 0
            tmpMutMatrix = np.multiply(tumorMutMatrix, tumorTypeMatrix.data[:, tumorTypeLabelIndx]==0) 
            tumorLnFScore = calcF(tmpMutMatrix, tumorDEGMatrix,  alphaIJKList)
            jointGTandTumorLableFScore = add(jointGTandTumorLableFScore, tumorLnFScore)

            # GT == 0 && label == 1
            tmpMutMatrix = np.multiply(tumorMutMatrix == 0, tumorTypeMatrix.data[:, tumorTypeLabelIndx])
            tumorLnFScore = calcF(tmpMutMatrix, tumorDEGMatrix,  alphaIJKList)  
            jointGTandTumorLableFScore = add(jointGTandTumorLableFScore, tumorLnFScore)
            
            # GT == 0 && label == 0
            tmpMutMatrix = np.multiply(tumorMutMatrix == 0, tumorTypeMatrix.data[:, tumorTypeLabelIndx] == 0) 
            tumorLnFScore = calcF(tmpMutMatrix, tumorDEGMatrix,  alphaIJKList)  
            jointGTandTumorLableFScore = add(jointGTandTumorLableFScore, tumorLnFScore)

            # stack the the joint loglikelihood matrix on top to the tumorLnFScore.  
            #Remove the tumor-type label variable from the matrix derived from tumorMutMatrix
            tumorLnFScore = np.vstack((jointGTandTumorLableFScore[:-1,:] , tumorLnFScore))             

        # Calculate the likelihood that A0, which is 1 for all tumors, as a cause for DEGs.  
        # Then, stack to the LnFScore, equivalent to adding a column of '1' to 
        # represent the A0 in tumorMutMatrix
        nullFscore = calcNullF(tumorDEGMatrix, alphaNull)
        tumorLnFScore = np.vstack((tumorLnFScore, nullFscore)) 

        # calcualte  log of the prior probability that any of mutated genes plus A0 can be a cause for a DEG.
        if PANCANFlag:
            if not opFlag:
                lntumorMutPriors = calcPanCanLnPrior(tumorMutGenes, dictGeneLength, vtprior[tumorTypeLabelIndx], v0)
            elif opFlag == AND:
                lntumorMutPriors = calcPanCanLnCombANDPrior(tumorMutGenes, dictGeneLength, vtprior[tumorTypeLabelIndx], v0)
            elif opFlag == OR:
                lntumorMutPriors = calcPanCanLnCombORPrior(tumorMutGenes, ppiDict, dictGeneLength, mutcnaMatrix.colnames, vtprior[tumorTypeLabelIndx], v0)
        else:
            if not opFlag:
                lntumorMutPriors = calcLnPrior(tumorMutGenes, dictGeneLength, v0)  # a m-dimension vector with m being number of mutations
            else:
                if opFlag == AND:
                    lntumorMutPriors = calcLnCombANDPrior(tumorMutGenes, dictGeneLength, v0)
                elif opFlag == OR:
                    lntumorMutPriors = calcLnCombORPrior(tumorMutGenes, ppiDict, dictGeneLength, mutcnaMatrix.colnames, v0)
                    
        # add to each column, note double transposes because  numpy broadcasts by row
        tumorLnFScore = np.add(tumorLnFScore.T, lntumorMutPriors).T  
               
        # calculate the normalizer for each column (GE).  
        colLogSum = calcColNormalizer(tumorLnFScore)       
        normalizer = np.tile(colLogSum, (tumorLnFScore.shape[0], 1))    
        posteriorAll = np.exp(add(tumorLnFScore, - normalizer))
        
        # now sum the posterior of each single GT with the posteriors of joint GT-Tumor-Type  
        posterior = np.add(posteriorAll[0:nTumorMutGenes-1, :], posteriorAll[nTumorMutGenes - 1:-2, :])
        posterior = np.vstack((posterior, posteriorAll[-2:, :]))        
        
        #write out the results 
        tumorMutGenes.append('A0')
        tumorPosterior = NamedMatrix(npMatrix = posterior, rownames = tumorMutGenes, colnames = tumorDEGGenes)
        tumorPosterior.writeToText(filePath = outputPath, filename = tumorNames[t] + ".csv")
def calcTCI (mutcnaMatrixFN, degMatrixFN, alphaNull = [1, 1], alphaIJKList = [2, 1, 1, 2], 
              v0=0.2, ppiDict = None, dictGeneLength = None, outputPath = ".", opFlag = None, rowBegin=0, rowEnd = None):
    """ 
    calcTCI (mutcnaMatrix, degMatrix, alphaIJList, alphaIJKList, dictGeneLength)
    
    Calculate the causal scores between each pair of SGA and DEG observed in each tumor
    
    Inputs:
        mutcnaMatrixFN      A file containing a N x G binary matrix containing the mutation and CNA 
                            data of all tumors.  N is the number of tumors and 
                            G is number of total number of unique genes.  For a
                            tumor, genes that have SGAs are indicated by "1"s and "0" 
                            otherwise. 
        degMatrixFN         A file contains a N x G' binary matrix representing DEG
                            status.  A "1" indicate a gene is differentially expressed
                            in a tumor.
        
        alphaIJList         A list of Dirichlet hyperparameters defining the prior
                            that a mutation event occurs
                            
        alphaIJKList        A list of Dirichlet hyperparameters for caulate the prior
                            of condition probability parameters. alphaIJK[0]: mut == 0 && deg == 0;
                            alphaIJK[1]: mut == 0 && deg == 1; alphaIJK[2]: mut == 1 && deg == 0;
                            alphaIJK[3]: mut == 1 && deg == 1
                            
        v0                  A float scalar indicate the prior probability that a DEG
                            is caused by a non-SGA factor 
        
        ppiDict             A dictionary keeps PPI network in the form an adjecency list (a dictionary of dictionary)
        
        dictGeneLength      A dictionary keeps the length of each of G genes in the 
                            mutcnaMatrix
    
    rowBegin, rowEnd        These two arguments control allow user to choose which block out of all tumors (defined by the two 
                row numbers) will be processes in by this function.  This can be used to process
                mulitple block in a parallel fashion.
    """
    
    # read in data in the form of NamedMatrix 
    try:
        mutcnaMatrix  = NamedMatrix(mutcnaMatrixFN)
    except:
        print "Failed to import data matrix %s\n" % mutcnaMatrixFN
        sys.exit() 
        
    try:
        degMatrix = NamedMatrix(degMatrixFN)
    except:
        print "Failed to import data matrix %s\n" % degMatrixFN
        sys.exit()
        
    exprsTumorNames = [x.replace("\"", "") for x in degMatrix.getRownames()]
    mutTumorNames = [x.replace("\"", "") for x in mutcnaMatrix.getRownames()]
    if exprsTumorNames != mutTumorNames:
        print "The tumors for mutcnaMatrix and degMatrix do not fully overlap!"
        print degMatrix.getRownames()
        print mutcnaMatrix.getRownames()
        sys.exit()
    
    if  not dictGeneLength :
        print "Gene length dictionary not provided, quit\n"
        sys.exit()
                        
    tumorNames = degMatrix.getRownames()
    nTumors, nMutGenes = mutcnaMatrix.shape()
    
    mutGeneNames = mutcnaMatrix.getColnames()
    degGeneNames = degMatrix.getColnames()
    
    # now we iterate through each tumor to infer the causal relationship between each 
    # pair of mut - deg
    # loop through individual tumors and calculate the causal scores between each pair of SGA and DEG    
    if not rowEnd:
        rowEnd = nTumors - 1
    else:
        if rowEnd >= nTumors:
            rowEnd = nTumors - 1
        elif rowEnd < rowBegin:
            print "Invalid rowEnd < rowBegin arguments given."
            sys.exit()

    if rowBegin > rowEnd:
        print "Invlid rowBegin > rowEnd argument given."
        sys.exit()

    print "Done with loading data, start processing tumor " + str(rowBegin)
    for t in range(rowBegin, rowEnd):
        #print pacifier
        if t % 50 == 0:
            print "Processed %s tumors" % str(t)
        
        # collect data related to DEGs.  Identify the genes that are differentially expressed in a tumor,
        # then collect
        degGeneIndx = [i for i, j in enumerate(degMatrix.data[t,:]) if j == 1]
        tumorDEGGenes = [degGeneNames[i] for i in degGeneIndx]
        nTumorDEGs = len(degGeneIndx)  # corresponding to n, the number of DEGs in a given tumor
        tumorDEGMatrix = degMatrix.data[:,degGeneIndx]
        
        # collect data related to mutations
        tumormutGeneIndx = [i for i, j in enumerate(mutcnaMatrix.data[t,:]) if j == 1]
        if len(tumormutGeneIndx) < 2:
            print tumorNames[t] + " has less than 2 mutations, skip."
            continue
        tumorMutGenes = [mutGeneNames[i] for i in tumormutGeneIndx]        
      
        # now extract the sub-matrix of mutcnaMatrix that only contain the genes that are mutated in a given tumor t
        # check if special operations to create combinations of SGA events are needed.  If combination operation is needed, 
        # new combined muation matrix will be created         
        if opFlag == AND:
            tmpNamedMat = NamedMatrix(npMatrix = tumorMutMatrix, colnames = tumorMutGenes, rownames = tumorNames)
            tumorNamedMatrix = createANDComb(tmpNamedMat, opFlag)
            if not tumorNamedMatrix:  # this tumor do not have any joint mutations that is oberved in 2% of all tumors
                continue
            tumorMutMatrix = tumorNamedMatrix.data
            tumorMutGenes = tumorNamedMatrix.colnames           
        elif opFlag == OR:
            tumorMutMatrix = createORComb(tumorMutGenes, ppiDict, mutcnaMatrix)      
        else:            
            tumorMutMatrix = mutcnaMatrix.data[:,  tumormutGeneIndx]
           
        ## check operation options:  1) orginal, do nothing and contiue
        # otherwise creat combinary matrix using the tumorMutMatrix 
        # createANDCombMatrix(tumorMutMatrix, operationFlag)
        if not opFlag:
            lntumorMutPriors = calcLnPrior(tumorMutGenes, dictGeneLength, v0)  # a m-dimension vector with m being number of mutations
        else:
            #print tumorMutGenes[:10]
            if opFlag == AND:
                lntumorMutPriors = calcLnCombANDPrior(tumorMutGenes, dictGeneLength, v0)
            elif opFlag == OR:
                lntumorMutPriors = calcLnCombORPrior(tumorMutGenes, ppiDict, dictGeneLength, mutcnaMatrix.colnames, v0)
            
        tumorMutGenes.append('A0')
        
        # calculate the pairwise likelihood that an SGA causes a DEG
        tumorLnFScore = calcF(tumorMutMatrix, tumorDEGMatrix,  alphaIJKList)        
        # Calculate the likelihood of expression data conditioning on A0, and then stack to 
        # the LnFScore, equivalent to adding a column of '1' to represent the A0 in tumorMutMatrix
        nullFscore = calcNullF(tumorDEGMatrix, alphaNull)
        tumorLnFScore = np.vstack((tumorLnFScore, nullFscore))  #check out this later
               
        # calcualte the prior probability that any of mutated genes can be a cause for a DEG,
        # tile it up to make an nTumorMutGenes x nTumorDEG matrix
        tumorMutPriorMatrix = np.tile(lntumorMutPriors, (nTumorDEGs, 1)).T
        
        lnFScore = add(tumorLnFScore, tumorMutPriorMatrix)
        
        # now we need to caclculate the normalized lnFScore so that each         
        columnAccumLogSum = np.zeros(nTumorDEGs)        
        for col in range(nTumorDEGs):
            currLogSum = np.NINF
            for j in range(lnFScore.shape[0]):
                if lnFScore[j,col] == np.NINF:
                    continue
                currLogSum = logSum(currLogSum, lnFScore[j,col])             
            columnAccumLogSum[col] = currLogSum
                
        normalizer = np.tile(columnAccumLogSum, (lnFScore.shape[0], 1))      

        posterior = np.exp(add(lnFScore, - normalizer))
        
        #write out the results        
        tumorPosterior = NamedMatrix(npMatrix = posterior, rownames = tumorMutGenes, colnames = tumorDEGGenes)
        if "\"" in tumorNames[t]:
            tumorNames[t] = tumorNames[t].replace("\"", "")    
        tumorPosterior.writeToText(filePath = outputPath, filename = tumorNames[t] + ".csv")
Example #3
0
def calcTCI (mutcnaMatrixFN, degMatrixFN, alphaNull = [1, 1], alphaIJKList = [2, 1, 1, 2], v0=0.2, dictGeneLength = None, outputPath = ".", opFlag = None):
    """ 
    calcTCI (mutcnaMatrix, degMatrix, alphaIJList, alphaIJKList, dictGeneLength)
    
    Calculate the causal scores between each pair of SGA and DEG observed in each tumor
    
    Inputs:
        mutcnaMatrixFN      A file containing a N x G binary matrix containing the mutation and CNA 
                            data of all tumors.  N is the number of tumors and 
                            G is number of total number of unique genes.  For a
                            tumor, genes that have SGAs are indicated by "1"s and "0" 
                            otherwise. 
        degMatrixFN         A file contains a N x G' binary matrix representing DEG
                            status.  A "1" indicate a gene is differentially expressed
                            in a tumor.
        
        alphaIJList         A list of Dirichlet hyperparameters defining the prior
                            that a mutation event occurs
                            
        alphaIJKList        A list of Dirichlet hyperparameters for caulate the prior
                            of condition probability parameters. alphaIJK[0]: mut == 0 && deg == 0;
                            alphaIJK[1]: mut == 0 && deg == 1; alphaIJK[2]: mut == 1 && deg == 0;
                            alphaIJK[3]: mut == 1 && deg == 1
                            
        v0                  A float scalar indicate the prior probability that a DEG
                            is caused by a non-SGA factor 
        
        dictGeneLength      A dictionary keeps the length of each of G genes in the 
                            mutcnaMatrix
    """
    
    # read in data in the form of NamedMatrix 
    try:
        mutcnaMatrix  = NamedMatrix(mutcnaMatrixFN)
    except:
        print "Failed to import data matrix %s\n" % mutcnaMatrixFN
        sys.exit() 
        
    try:
        degMatrix = NamedMatrix(degMatrixFN)
    except:
        print "Failed to import data matrix %s\n" % degMatrixFN
        sys.exit()
        
    if degMatrix.getRownames() != mutcnaMatrix.getRownames():
        print "The tumors for mutcnaMatrix and degMatrix do not fully overlap!"
        sys.exit()
    
    if  not dictGeneLength :
        print "Gene length dictionary not provided, quit\n"
        sys.exit()
        
    # now we iterate through each tumor to infer the causal relationship between each 
    # pair of mut - deg
    tumorNames = degMatrix.getRownames()
    nTumors, nMutGenes = mutcnaMatrix.shape()
    
    mutGeneNames = mutcnaMatrix.getColnames()
    degGeneNames = degMatrix.getColnames()
    

    for t in range(nTumors):
        #print pacifier
        if t % 50 == 0:
            print "Processed %s tumors" % str(t)
        
        # collect data related to mutations
        tumormutGeneIndx = [i for i, j in enumerate(mutcnaMatrix.data[t,:]) if j == 1]
        nTumorMutGenes = len(tumormutGeneIndx)
        tumorMutGenes=  [mutGeneNames[i] for i in tumormutGeneIndx]        
      
        #now extract the sub-matrix of mutcnaMatrix that only contain the genes that are mutated in a given tumor t
        # stack a column of '1' to represent the A0.  If combination operation is needed, new combined muation matrix 
        # will be created         
        
        tumorMutMatrix = mutcnaMatrix.data[:,  tumormutGeneIndx]
        if opFlag:
            tmpNamedMat = NamedMatrix(npMatrix = tumorMutMatrix, colnames = tumorMutGenes, rownames = tumorNames)
            tumorNamedMatrix = createComb(tmpNamedMat, opFlag)
            if not tumorNamedMatrix:  # this tumor do not have any joint mutations that is oberved in 2% of all tumors
                continue
            tumorMutGenes = tumorNamedMatrix.colnames
            tumorMutMatrix = tumorNamedMatrix.data
            
        
        ## check operation options:  1) orginal, do nothing and contiue
        # otherwise creat combinary matrix using the tumorMutMatrix 
        # createCombMatrix(tumorMutMatrix, operationFlag)
        if not opFlag:
            lntumorMutPriors = calcLnPrior(tumorMutGenes, dictGeneLength, v0)  # a m-dimension vector with m being number of mutations
        else:
            lntumorMutPriors = calcLnCombPrior(tumorMutGenes, dictGeneLength, v0)
            
        tumorMutGenes.append('A0')
        
        # collect data related to DEGs
        degGeneIndx = [i for i, j in enumerate(degMatrix.data[t,:]) if j == 1]
        tumorDEGGenes = [degGeneNames[i] for i in degGeneIndx]
        nTumorDEGs = len(degGeneIndx)  # corresponding to n, the number of DEGs in a given tumor
        tumorDEGMatrix = degMatrix.data[:,degGeneIndx]
        
        # calculate pair-wise m x n matrix
        tumorLnFScore = calcF(tumorMutMatrix, tumorDEGMatrix,  alphaIJKList)
        nullFscore = calcNullF(tumorDEGMatrix, alphaNull)
        tumorLnFScore = np.vstack((tumorLnFScore, nullFscore))  #check out this later
               
        # calcualte the prior probability that any of mutated genes can be a cause for a DEG,
        # tile it up to make an nTumorMutGenes x nTumorDEG matrix
        tumorMutPriorMatrix = np.tile(lntumorMutPriors, (nTumorDEGs, 1)).T
        
        lnFScore = add(tumorLnFScore, tumorMutPriorMatrix)

#debug code below two lines        
        #tmpOut = NamedMatrix(npMatrix = lnFScore, colnames = tumorDEGGenes, rownames = tumorMutGenes)
        #tmpOut.writeToText(outputPath, filename = tumorNames[t] + "fscore.csv")
        
        
        # now we need to caclculate the normalized lnFScore so that each         
        columnAccumLogSum = np.zeros(nTumorDEGs)        
        for col in range(nTumorDEGs):
            currLogSum = np.NINF
            for j in range(lnFScore.shape[0]):
                if lnFScore[j,col] == np.NINF:
                    continue
                currLogSum = logSum(currLogSum, lnFScore[j,col])             
            columnAccumLogSum[col] = currLogSum
                
        normalizer = np.tile(columnAccumLogSum, (lnFScore.shape[0], 1))      

        posterior = np.exp(add(lnFScore, - normalizer))
        
        #write out the results        
        tumorPosterior = NamedMatrix(npMatrix = posterior, rownames = tumorMutGenes, colnames = tumorDEGGenes)     
        tumorPosterior.writeToText(outputPath, filename = tumorNames[t] + "-mut-vs-DEG-posterior.csv")