def PlotAllTrees(SavePath, TumorNumber, ReguPara, solver='gurobi'): for item in ReguPara: Paths = glob.glob('%sresults/%s/%s/%s/%s/result*alpha%s.mat' % (ParentDirectory, DateFolder, TumorName, TumorNumber, solver, item)) testFunction.CheckDirectory('%s%s/%s/' % (SavePath, TumorNumber, item)) for i in range(len(Paths)): plotTree(S=extractValue(Paths[i], 'TreeStr'), root=12, filename='%s%s/%s/' % (SavePath, TumorNumber, item)+'%s.png' % i)
def SolveDecomposition(AllDataPaths, solver, noise): N = len(AllDataPaths) #start_time = time.time() #accurate_in_cells = np.zeros((N, k)) #accurate_rows = np.zeros(N) for z in range(N): CIndex, CRefer, CReferIndex, CInitial, CTrue, FTrue, FTrueAll, TumorSample, dirA, COrigin=extractValue(AllDataPaths[z]) CRefer = testFunction.addNoise(CRefer, noise) #add noise to the original reference cells TumorNumber = TumorSample.shape[1] #choose which solver you want to use: if solver == 'nmf': import NMF_solver as NS #make a directory to save the results result_path = '%sresults/%s/%s/%s/%s/' % ( ParentDirectory, DateFolder, TumorName, TumorNumber, solver) testFunction.CheckDirectory(result_path) cells = CTrue.shape[1] print("No.%s experiment(s) using %s"%(z+1, solver)) print(" From %s tumor samples, infer %s cells, noise level is %s" % (str(TumorNumber), str(cells), str(noise))) iter_nn, dist, accuracy, right_row, rmsd_c, rmsd_f, rms_c, rms_f, InferC, InferF=\ NS.decompose(TumorSample, FTrue, CTrue, CRefer, CInitial, reg1=reg1, k=cells) meanAcc = np.sum(accuracy)/cells #accurate_in_cells[z, :] = accuracy[:, 0] #accurate_rows[z] = right_row sio.savemat(result_path + 'result' + str(z) + 'alpha' + str(reg1) + 'noise' + str(noise) + '.mat', {'CTrue': CTrue, 'CInferred': InferC, 'CRefer': CRefer, 'CIndex': CIndex, 'CReferIndex': CReferIndex, 'FTrue':FTrue, 'FInferred': InferF, 'FTrueAll': FTrueAll, 'Accuracy': accuracy, 'rmsdC': rmsd_c, 'rmsdF': rmsd_f, 'rmsInC': rms_c, 'rmsInF': rms_f, 'Step': iter_nn, 'totalAcc': right_row, 'meanAcc': meanAcc}) #RunTime = time.time() - start_time #print("Task run %0.2f hours." % (RunTime / 3600.0)) #print('\n') elif solver=='gurobi': import GurobiILP_solver as GS #make a directory to save the results result_path = '%sresults/%s/%s/%s/%s/' % ( ParentDirectory, DateFolder, TumorName, TumorNumber, solver) testFunction.CheckDirectory(result_path) print("No.%s experiment(s) using %s" % (z+1, solver)) #start_time = time.time() CRefer = CRefer.T CInitial = CInitial.T CTrue = CTrue.T COrigin = COrigin.T FTrue = FTrue.T TumorSample = TumorSample.T cellsList = [2, 2, 2] cellsNoiseList = [23, 23, 23] CSel = np.array(cellsList) + np.array(cellsNoiseList) majorIndex = testFunction.findMajorIndex(cellsList, cellsNoiseList) dirA = dirA[:, majorIndex] cells = CTrue.shape[0] cellsObserv = CRefer.shape[0] CRefer = np.concatenate( (CRefer, 2 * np.ones([1, CRefer.shape[1]])), axis=0) #add one diploid row to the end as the root Ctotal = np.zeros( [cells+cellsObserv+1, COrigin.shape[1]], dtype=np.float) Ctotal[range(cells),:] = CInitial Ctotal[cells:(cells+cellsObserv+1), :] = CRefer # ################################################# # Calculate inferred single-cell components oldObj = [0, 0, 0] thresholdI = 10 ** (-4) step = 1 Cprev = np.matrix(Ctotal, dtype=np.float) print(" regularization parameter=%s, from %s tumor samples, infer %s cells, noise level is %s." % (str(alpha), str(TumorNumber),str(cells), str(noise))) while(1): #print("Step:", step) [F, objVal1] = GS.updateProportion( TumorSample, Ctotal, cells, root=cells+cellsObserv, dirA=dirA) [S, objVal2] = GS.updateTree( TumorSample, Ctotal, cells, alpha=alpha, root=cells+cellsObserv) step += 1 [CUnknown, objVal] = GS.updateCopyNum(TumorSample, F, S, CRefer, cells, alpha=alpha, root=cells+cellsObserv, vType='I', Cap=True) Ctotal[0:cells, :] = CUnknown change = abs(oldObj[2] - objVal) change1 = abs(oldObj[0] - objVal1) change2 = abs(oldObj[1] - objVal2) #print('objVal:', objVal) oldObj[2] = objVal oldObj[0] = objVal1 oldObj[1] = objVal2 if (change < thresholdI or step > 100): break acc = testFunction.calcAccuracy(CUnknown, CTrue, CellsInCol=False) [CUnknown, order] = testFunction.arrangeC( CUnknown, CTrue, CellsInCol=False) totalAcc = testFunction.calcAccuracyByRow( CUnknown, CTrue, CellsInCol=False) F = F[:, order] F = np.matrix(F) rmsdC = testFunction.calcRMSD(CUnknown, CTrue) rmsdF = testFunction.calcRMSD(F, FTrue) meanAcc = np.sum(acc)/cells rms_c = testFunction.calcRMSInCell(CUnknown, CTrue, CellsInCol=False) rms_f = testFunction.calcRMSInCell(F, FTrue, Cell=False, CellsInCol=False) sio.savemat(result_path + 'result' + str(z) + 'alpha' + str(alpha) + 'noise' + str(noise) + '.mat', {'meanAcc': meanAcc, 'Accuracy': acc, 'CTrue': CTrue.T, 'CRefer': CRefer[0:6,:].T, 'totalAcc': totalAcc, 'CInferred': CUnknown.T, 'FTrueAll': FTrueAll, 'FTrue': FTrue.T, 'FInferred': F.T, 'Step': step, 'rmsdC': rmsdC, 'rmsdF': rmsdF, 'CIndex': CIndex, 'CReferIndex': CReferIndex, 'rmsInC': rms_c, 'rmsInF': rms_f, 'TreeStr': S}) elif solver=='scip': import SCIP_solver as SP #make a directory to save the results result_path = '%sresults/%s/%s/%s/%s/' % ( ParentDirectory, DateFolder, TumorName, TumorNumber, solver) testFunction.CheckDirectory(result_path) print("No.%s experiment(s) using %s" % (z+1, solver)) CRefer = CRefer.T CInitial = CInitial.T CTrue = CTrue.T COrigin = COrigin.T FTrue = FTrue.T TumorSample = TumorSample.T cellsList = [2, 2, 2] cellsNoiseList = [23, 23, 23] CSel = np.array(cellsList) + np.array(cellsNoiseList) majorIndex = testFunction.findMajorIndex(cellsList, cellsNoiseList) dirA = dirA[:, majorIndex] cells = CTrue.shape[0] cellsObserv = CRefer.shape[0] CRefer = np.concatenate( (CRefer, 2 * np.ones([1, CRefer.shape[1]])), axis=0) # add one diploid row to the end as the root Ctotal = np.zeros( [cells+cellsObserv+1, COrigin.shape[1]], dtype=np.float) Ctotal[range(cells), :] = CInitial Ctotal[cells:(cells+cellsObserv+1), :] = CRefer # ################################################# # Calculate inferred single-cell components oldObj = [0, 0, 0] thresholdI = 10 ** (-4) step = 1 Cprev = np.matrix(Ctotal, dtype=np.float) print(" regularization parameter=%s, from %s tumor samples, infer %s cells, noise level is %s." % ( str(alpha), str(TumorNumber), str(cells), str(noise))) while(1): #print("Step:", step) [F, objVal1] = SP.updateProportion( TumorSample, Ctotal, cells, root=cells+cellsObserv, dirA=dirA, beta=beta) [S, objVal2] = SP.updateTree( TumorSample, Ctotal, cells, alpha, root=cells+cellsObserv) step += 1 [CUnknown, objVal] = SP.updateCopyNum(TumorSample, F, S, CRefer, cells, beta, root=cells+cellsObserv, vType='I', Cap=True) Ctotal[0:cells, :] = CUnknown change = abs(oldObj[2] - objVal) change1 = abs(oldObj[0] - objVal1) change2 = abs(oldObj[1] - objVal2) #print('objVal:', objVal) oldObj[2] = objVal oldObj[0] = objVal1 oldObj[1] = objVal2 if (change < thresholdI or step > 100): break acc = testFunction.calcAccuracy(CUnknown, CTrue, CellsInCol=False) [CUnknown, order] = testFunction.arrangeC( CUnknown, CTrue, CellsInCol=False) totalAcc = testFunction.calcAccuracyByRow( CUnknown, CTrue, CellsInCol=False) F = F[:, order] F = np.matrix(F) rmsdC = testFunction.calcRMSD(CUnknown, CTrue) rmsdF = testFunction.calcRMSD(F, FTrue) meanAcc = np.sum(acc)/cells rms_c = testFunction.calcRMSInCell( CUnknown, CTrue, CellsInCol=False) rms_f = testFunction.calcRMSInCell( F, FTrue, Cell=False, CellsInCol=False) sio.savemat(result_path + 'result' + str(z) + 'alpha' + str(beta) + 'noise' + str(noise) + '.mat', {'meanAcc': meanAcc, 'Accuracy': acc, 'CTrue': CTrue.T, 'CRefer': CRefer[0:6, :].T, 'totalAcc': totalAcc, 'CInferred': CUnknown.T, 'FTrueAll': FTrueAll, 'FTrue': FTrue.T, 'FInferred': F.T, 'Step': step, 'rmsdC': rmsdC, 'rmsdF': rmsdF, 'CIndex': CIndex, 'CReferIndex': CReferIndex, 'rmsInC': rms_c, 'rmsInF': rms_f, 'TreeStr': S}) else: print('Solver Not Available, please choose nmf, gurobi or scip') pass
ParentDirectory = sys.argv[ 1] # Directory to the folder that contains subfolders for # code, data, result, simulation etc. DateFolder = str(sys.argv[2]) # specify a folder to save the results TumorName = str(sys.argv[3]) # pick a tumor, GBM07 or GBM33 tumor_number = int(sys.argv[4]) # the total number of bulk tumor samples alpha = ast.literal_eval(sys.argv[5]) # Dirichlet distribution parameters N = int(sys.argv[6]) #how many replicates you want to simulate Cap = bool(sys.argv[7]) # the largest permitted copy-number; larger # numbers will be set equal to Cap #check and/or make the directory to save the simulated data #save the simulated data with different tumor samples in different data output_dir = '%ssimulation/%s/%s/%s' % (ParentDirectory, DateFolder, TumorName, str(tumor_number)) testFunction.CheckDirectory(output_dir) ''' import single cell data ''' # single cell data should be stored in a folder that # is a subfolder of the parent folder where the code stored # For exmaple: codes are stored in: # ~/ParentDirectory/code/ # Then the data should be saved in: # ~/ParentDirectory/data/ def ImportSCData(ParentDirectory, TumorName, IntCNV=True, Cap=False): '''Read the single-cell copy number data for 'TumorName' from 'ParentDir'/data. Return a numpy array with markers as rows
# get to directory that contain other subfolders such as code test data etc. ParentDirectory = sys.argv[ 1] # will create a folder named figures under this directory DateFolder = str(sys.argv[2]) # specify a folder to retrieve the results TumorName = str(sys.argv[3]) # pick a tumor, GBM07 or GBM33 TumorNumbers = ast.literal_eval( sys.argv[4]) # a list of different number of tumor samples solvers = ast.literal_eval(sys.argv[5]) # a list of different solver used SavedFolder = str(sys.argv[6]) # # specify a folder to save the figures AllDataPaths = glob.glob('%sresults/%s/%s/*.mat' % (ParentDirectory, DateFolder, TumorName)) #check and/or make the directory to save the figures testFunction.CheckDirectory('%sfigures/%s/%s/' % (ParentDirectory, SavedFolder, TumorName)) #set the style of the figures sns.set(style="ticks", palette="pastel") #extract the result of the test, such average accuarcy, RMSD etc def extractValue(directory, key): data = sio.loadmat(directory) return data[key] """ get the result for different solver then calculathe the average from all the test cases saved in an array, row is different tumor samples, column is average result in different regularization parameter