def PlotAllTrees(SavePath, TumorNumber, ReguPara, solver='gurobi'):
    for item in ReguPara:
        Paths = glob.glob('%sresults/%s/%s/%s/%s/result*alpha%s.mat' % (ParentDirectory, DateFolder,
                                                                        TumorName, TumorNumber, solver, item))
        testFunction.CheckDirectory('%s%s/%s/' % (SavePath, TumorNumber, item))
        for i in range(len(Paths)):
            plotTree(S=extractValue(Paths[i], 'TreeStr'), root=12,
                                    filename='%s%s/%s/' % (SavePath, TumorNumber, item)+'%s.png' % i)
Beispiel #2
0
def SolveDecomposition(AllDataPaths, solver, noise):
    N = len(AllDataPaths)
    #start_time = time.time()
    #accurate_in_cells = np.zeros((N, k))
    #accurate_rows = np.zeros(N)
    for z in range(N):
        CIndex, CRefer, CReferIndex, CInitial, CTrue, FTrue, FTrueAll, TumorSample, dirA, COrigin=extractValue(AllDataPaths[z])
        CRefer = testFunction.addNoise(CRefer, noise) #add noise to the original reference cells
        TumorNumber = TumorSample.shape[1]
        #choose which solver you want to use:
        if solver == 'nmf':
            import NMF_solver as NS
            #make a directory to save the results
            result_path = '%sresults/%s/%s/%s/%s/' % (
                ParentDirectory, DateFolder, TumorName, TumorNumber, solver)
            testFunction.CheckDirectory(result_path)
            cells = CTrue.shape[1]
            print("No.%s experiment(s) using %s"%(z+1, solver))
            print(" From %s tumor samples, infer %s cells, noise level is %s" %
                  (str(TumorNumber), str(cells), str(noise)))
            iter_nn, dist, accuracy, right_row, rmsd_c, rmsd_f, rms_c, rms_f, InferC, InferF=\
                NS.decompose(TumorSample, FTrue, CTrue, CRefer, CInitial, reg1=reg1, k=cells)
            meanAcc = np.sum(accuracy)/cells
            #accurate_in_cells[z, :] = accuracy[:, 0]
            #accurate_rows[z] = right_row
            sio.savemat(result_path + 'result' + str(z) + 'alpha' + str(reg1) + 'noise' + str(noise) + '.mat',
                             {'CTrue': CTrue, 'CInferred': InferC, 'CRefer': CRefer, 'CIndex': CIndex,
                              'CReferIndex': CReferIndex, 'FTrue':FTrue, 'FInferred': InferF, 'FTrueAll': FTrueAll,
                              'Accuracy': accuracy, 'rmsdC': rmsd_c, 'rmsdF': rmsd_f, 'rmsInC': rms_c, 'rmsInF': rms_f,
                              'Step': iter_nn, 'totalAcc': right_row, 'meanAcc': meanAcc})

            #RunTime = time.time() - start_time
            
            #print("Task run %0.2f hours." % (RunTime / 3600.0))
            #print('\n')

        elif solver=='gurobi':
            import GurobiILP_solver as GS
            #make a directory to save the results
            result_path = '%sresults/%s/%s/%s/%s/' % (
                ParentDirectory, DateFolder, TumorName, TumorNumber, solver)
            testFunction.CheckDirectory(result_path)

            print("No.%s experiment(s) using %s" % (z+1, solver))
            #start_time = time.time()
            CRefer = CRefer.T
            CInitial = CInitial.T
            CTrue = CTrue.T
            COrigin = COrigin.T
            FTrue = FTrue.T
            TumorSample = TumorSample.T

            cellsList = [2, 2, 2]
            cellsNoiseList = [23, 23, 23]
            CSel = np.array(cellsList) + np.array(cellsNoiseList)
            majorIndex = testFunction.findMajorIndex(cellsList, cellsNoiseList)
            dirA = dirA[:, majorIndex]

            cells = CTrue.shape[0]
            cellsObserv = CRefer.shape[0]

            CRefer = np.concatenate(
                (CRefer, 2 * np.ones([1, CRefer.shape[1]])), axis=0)     #add one diploid row to the end as the root
            Ctotal = np.zeros(
                [cells+cellsObserv+1, COrigin.shape[1]], dtype=np.float)
            Ctotal[range(cells),:] = CInitial
            Ctotal[cells:(cells+cellsObserv+1), :] = CRefer

            # #################################################
            # Calculate inferred single-cell components
            oldObj = [0, 0, 0]
            thresholdI = 10 ** (-4)
            step = 1

            Cprev = np.matrix(Ctotal, dtype=np.float)
            print(" regularization parameter=%s, from %s tumor samples, infer %s cells, noise level is %s." % (str(alpha), str(TumorNumber),str(cells), str(noise)))
            while(1):
                #print("Step:", step)
                [F, objVal1] = GS.updateProportion(
                    TumorSample, Ctotal, cells, root=cells+cellsObserv, dirA=dirA)
                [S, objVal2] = GS.updateTree(
                    TumorSample, Ctotal, cells, alpha=alpha, root=cells+cellsObserv)
                step += 1
                [CUnknown, objVal] = GS.updateCopyNum(TumorSample, F, S, CRefer, cells, alpha=alpha, root=cells+cellsObserv,
                                                vType='I', Cap=True)
                Ctotal[0:cells, :] = CUnknown
                change = abs(oldObj[2] - objVal)
                change1 = abs(oldObj[0] - objVal1)
                change2 = abs(oldObj[1] - objVal2)
                #print('objVal:', objVal)
                oldObj[2] = objVal
                oldObj[0] = objVal1
                oldObj[1] = objVal2
                if (change < thresholdI or step > 100):
                    break
            acc = testFunction.calcAccuracy(CUnknown, CTrue, CellsInCol=False)
            [CUnknown, order] = testFunction.arrangeC(
                CUnknown, CTrue, CellsInCol=False)

            totalAcc = testFunction.calcAccuracyByRow(
                CUnknown, CTrue, CellsInCol=False)
            F = F[:, order]
            F = np.matrix(F)
            
            rmsdC = testFunction.calcRMSD(CUnknown, CTrue)
            rmsdF = testFunction.calcRMSD(F, FTrue)
            meanAcc = np.sum(acc)/cells
            rms_c = testFunction.calcRMSInCell(CUnknown, CTrue, CellsInCol=False)
            rms_f = testFunction.calcRMSInCell(F, FTrue, Cell=False, CellsInCol=False)

            sio.savemat(result_path + 'result' + str(z) + 'alpha' + str(alpha) + 'noise' + str(noise) + '.mat',
                        {'meanAcc': meanAcc, 'Accuracy': acc, 'CTrue': CTrue.T, 'CRefer': CRefer[0:6,:].T,
                        'totalAcc': totalAcc, 'CInferred': CUnknown.T, 'FTrueAll': FTrueAll, 'FTrue': FTrue.T,
                         'FInferred': F.T, 'Step': step, 'rmsdC': rmsdC, 'rmsdF': rmsdF, 'CIndex': CIndex,
                         'CReferIndex': CReferIndex, 'rmsInC': rms_c, 'rmsInF': rms_f, 'TreeStr': S})

        elif solver=='scip':
            import SCIP_solver as SP
            #make a directory to save the results
            result_path = '%sresults/%s/%s/%s/%s/' % (
                ParentDirectory, DateFolder, TumorName, TumorNumber, solver)
            testFunction.CheckDirectory(result_path)
            print("No.%s experiment(s) using %s" % (z+1, solver))

            CRefer = CRefer.T
            CInitial = CInitial.T
            CTrue = CTrue.T
            COrigin = COrigin.T
            FTrue = FTrue.T
            TumorSample = TumorSample.T

            cellsList = [2, 2, 2]
            cellsNoiseList = [23, 23, 23]
            CSel = np.array(cellsList) + np.array(cellsNoiseList)
            majorIndex = testFunction.findMajorIndex(cellsList, cellsNoiseList)
            dirA = dirA[:, majorIndex]

            cells = CTrue.shape[0]
            cellsObserv = CRefer.shape[0]

            CRefer = np.concatenate(
                (CRefer, 2 * np.ones([1, CRefer.shape[1]])), axis=0)  # add one diploid row to the end as the root
            Ctotal = np.zeros(
                [cells+cellsObserv+1, COrigin.shape[1]], dtype=np.float)
            Ctotal[range(cells), :] = CInitial
            Ctotal[cells:(cells+cellsObserv+1), :] = CRefer

            # #################################################
            # Calculate inferred single-cell components
            oldObj = [0, 0, 0]
            thresholdI = 10 ** (-4)
            step = 1

            Cprev = np.matrix(Ctotal, dtype=np.float)
            print(" regularization parameter=%s, from %s tumor samples, infer %s cells, noise level is %s." % (
                str(alpha), str(TumorNumber), str(cells), str(noise)))
            while(1):
                #print("Step:", step)
                [F, objVal1] = SP.updateProportion(
                    TumorSample, Ctotal, cells, root=cells+cellsObserv, dirA=dirA, beta=beta)
                [S, objVal2] = SP.updateTree(
                    TumorSample, Ctotal, cells, alpha, root=cells+cellsObserv)
                step += 1
                [CUnknown, objVal] = SP.updateCopyNum(TumorSample, F, S, CRefer, cells, beta, root=cells+cellsObserv,
                                                vType='I', Cap=True)
                Ctotal[0:cells, :] = CUnknown
                change = abs(oldObj[2] - objVal)
                change1 = abs(oldObj[0] - objVal1)
                change2 = abs(oldObj[1] - objVal2)
                #print('objVal:', objVal)
                oldObj[2] = objVal
                oldObj[0] = objVal1
                oldObj[1] = objVal2
                if (change < thresholdI or step > 100):
                    break

            acc = testFunction.calcAccuracy(CUnknown, CTrue, CellsInCol=False)
            [CUnknown, order] = testFunction.arrangeC(
                CUnknown, CTrue, CellsInCol=False)

            totalAcc = testFunction.calcAccuracyByRow(
                CUnknown, CTrue, CellsInCol=False)
            F = F[:, order]
            F = np.matrix(F)

            rmsdC = testFunction.calcRMSD(CUnknown, CTrue)
            rmsdF = testFunction.calcRMSD(F, FTrue)
            meanAcc = np.sum(acc)/cells
            rms_c = testFunction.calcRMSInCell(
                CUnknown, CTrue, CellsInCol=False)
            rms_f = testFunction.calcRMSInCell(
                F, FTrue, Cell=False, CellsInCol=False)

            sio.savemat(result_path + 'result' + str(z) + 'alpha' + str(beta) + 'noise' + str(noise) + '.mat',
                        {'meanAcc': meanAcc, 'Accuracy': acc, 'CTrue': CTrue.T, 'CRefer': CRefer[0:6, :].T,
                         'totalAcc': totalAcc, 'CInferred': CUnknown.T, 'FTrueAll': FTrueAll, 'FTrue': FTrue.T,
                         'FInferred': F.T, 'Step': step, 'rmsdC': rmsdC, 'rmsdF': rmsdF, 'CIndex': CIndex,
                         'CReferIndex': CReferIndex, 'rmsInC': rms_c, 'rmsInF': rms_f, 'TreeStr': S})

        else:
            print('Solver Not Available, please choose nmf, gurobi or scip')
            pass
Beispiel #3
0
ParentDirectory = sys.argv[
    1]  # Directory to the folder that contains subfolders for
# code, data, result, simulation etc.
DateFolder = str(sys.argv[2])  # specify a folder to save the results
TumorName = str(sys.argv[3])  # pick a tumor, GBM07 or GBM33
tumor_number = int(sys.argv[4])  # the total number of bulk tumor samples
alpha = ast.literal_eval(sys.argv[5])  # Dirichlet distribution parameters
N = int(sys.argv[6])  #how many replicates you want to simulate
Cap = bool(sys.argv[7])  # the largest permitted copy-number; larger
# numbers will be set equal to Cap

#check and/or make the directory to save the simulated data
#save the simulated data with different tumor samples in different data
output_dir = '%ssimulation/%s/%s/%s' % (ParentDirectory, DateFolder, TumorName,
                                        str(tumor_number))
testFunction.CheckDirectory(output_dir)
'''
import single cell data
'''

# single cell data should be stored in a folder that
# is a subfolder of the parent folder where the code stored
# For exmaple: codes are stored in:
#   ~/ParentDirectory/code/
#   Then the data should be saved in:
#   ~/ParentDirectory/data/


def ImportSCData(ParentDirectory, TumorName, IntCNV=True, Cap=False):
    '''Read the single-cell copy number data for 'TumorName' from
    'ParentDir'/data.  Return a numpy array with markers as rows
Beispiel #4
0
# get to directory that contain other subfolders such as code test data etc.
ParentDirectory = sys.argv[
    1]  # will create a folder named figures under this directory
DateFolder = str(sys.argv[2])  # specify a folder to retrieve the results
TumorName = str(sys.argv[3])  # pick a tumor, GBM07 or GBM33
TumorNumbers = ast.literal_eval(
    sys.argv[4])  # a list of different number of tumor samples
solvers = ast.literal_eval(sys.argv[5])  # a list of different solver used
SavedFolder = str(sys.argv[6])  # # specify a folder to save the figures

AllDataPaths = glob.glob('%sresults/%s/%s/*.mat' %
                         (ParentDirectory, DateFolder, TumorName))

#check and/or make the directory to save the figures
testFunction.CheckDirectory('%sfigures/%s/%s/' %
                            (ParentDirectory, SavedFolder, TumorName))
#set the style of the figures
sns.set(style="ticks", palette="pastel")


#extract the result of the test, such average accuarcy, RMSD etc
def extractValue(directory, key):
    data = sio.loadmat(directory)
    return data[key]


"""
get the result for different solver
then calculathe the average from all the test cases
saved in an array, row is different tumor samples, 
                column is average result in different regularization parameter