def main(): mutationmatrix = '/Users/jlu96/maf/new/PRAD_broad/PRAD_broad-som.m2' patientFile = None #'/Users/jlu96/maf/new/PRAD_broad/shared_patients.plst' geneFile = None #'/Users/jlu96/conte/jlu/REQUIREDFILES_OnlyLoss2/COSMICGenes_OnlyLoss.txt' load_directory = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LoadMatrices' minFreq = 0 num_permutations = 20 binary_perm_method = False Q = 100 write_matrices = True matrixdirectory = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LoadMatrices' #'/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/SARC_broad-som-jl-' + ('matrix' if binary_perm_method else 'network') outmutexfile = matrixdirectory + '/mutex' + str(num_permutations) + str(time.time()) + '.tsv' outcooccurfile = matrixdirectory + '/cooccur' + str(num_permutations) + str(time.time()) + '.tsv' outseedsfile = matrixdirectory + '/seeds' + str(time.time()) + '.tsv' if not os.path.exists(os.path.dirname(matrixdirectory)): os.makedirs(os.path.dirname(matrixdirectory)) numGenes, numCases, genes, patients, geneToCases, patientToGenes = mex.load_mutation_data(mutationmatrix, patientFile, geneFile, minFreq) print "numGenes ", numGenes, " and numCases ", numCases for patient in patients: if not patientToGenes[patient]: patientToGenes.pop(patient) print patient, "popped" # Generate Permutation Matrices pm = PermutationMatrices(geneToCases, patientToGenes, num_permutations, Q=Q, matrixdirectory=matrixdirectory, binary_perm_method=binary_perm_method, write_matrices=write_matrices, load_directory=load_directory, geneFile=geneFile, patientFile=patientFile, minFreq=minFreq) # Make list of pairs from highly mutated genes test_genes = [gene for gene in genes if len(geneToCases[gene]) > 5] # for test_gene in test_genes: # print test_gene genepairs = met.getgenepairs(geneToCases, test_genes) print "Number of pairs to test ", len(genepairs) # CALCULATE MUTEX # Create a list of ConditionFunctions that you must later initialize... ConditionFunctions = range(len(genepairs)) mutex_set_condition_function_list = [] # Generate set_condition_function_list for i in range(len(genepairs)): genepair = genepairs[i] condition_dict = {} condition_dict['Genes'] = tuple(genepair) condition_dict['Overlap'] = len(set.intersection(*[geneToCases[gene] for gene in condition_dict['Genes']])) condition_dict['Mutex'] = True ConditionFunctions[i] = Condition([condition_dict]) if [condition_dict] != ConditionFunctions[i].conditions: print condition_dict, ConditionFunctions[i].conditions mutex_set_condition_function_list.append((genepair, ConditionFunctions[i])) print "Finished mutex condition function list" t= time.time() # Calculate pvalues for mutual exclusivity pair_to_mutex = {} pair_to_mutex_network_pvalue = pm.set_to_pvalue(mutex_set_condition_function_list) print "mutex pair network pvalues finished in ", time.time() - t for genepair in genepairs: pair_to_mutex[genepair] = mex.analyze_mutex_set_new(numCases, geneToCases, patientToGenes, genepair) pair_to_mutex[genepair]['NetworkProbability'] = pair_to_mutex_network_pvalue[genepair] # Write to output with open(outmutexfile, 'w') as csvfile: fieldnames = pair_to_mutex[genepairs[0]].keys() writer = csv.DictWriter(csvfile, delimiter='\t', fieldnames=fieldnames) writer.writeheader() for genepair in pair_to_mutex: writer.writerow(pair_to_mutex[genepair]) # CALCULATE COOCCUR cooccur_set_condition_function_list = [] # Generate set_condition_function_list for genepair in genepairs: ConditionFunction = Condition(None) condition_dict = {} condition_dict['Genes'] = tuple(genepair) condition_dict['Overlap'] = len(set.intersection(*[geneToCases[gene] for gene in condition_dict['Genes']])) condition_dict['Mutex'] = False ConditionFunction.set_params([condition_dict]) cooccur_set_condition_function_list.append((genepair, ConditionFunction)) t= time.time() # Calculate pvalues for mutual exclusivity pair_to_cooccur = {} pair_to_cooccur_network_pvalue = pm.set_to_pvalue(cooccur_set_condition_function_list) print "cooccur pair network pvalues finished in ", time.time() - t for genepair in genepairs: pair_to_cooccur[genepair] = mex.analyze_cooccur_set_new(numCases, geneToCases, patientToGenes, genepair) pair_to_cooccur[genepair]['NetworkProbability'] = pair_to_cooccur_network_pvalue[genepair] # Write to output with open(outcooccurfile, 'w') as csvfile: fieldnames = pair_to_cooccur[genepairs[0]].keys() writer = csv.DictWriter(csvfile, delimiter='\t', fieldnames=fieldnames) writer.writeheader() for genepair in pair_to_cooccur: writer.writerow(pair_to_cooccur[genepair]) # Write seeds to output with open(outseedsfile, 'w') as csvfile: writer = csv.writer(csvfile, delimiter='\t') for seed in pm.seeds: writer.writerow([seed])
def complete_mutexpairs(self, genepairs, p=0.05, maxOverlap=200, parallel_compute_number=0): print "Generating list of", len(genepairs), " mutually exclusive hypotheses to test on permutation matrices..." # Generate condition functions after analyzing each gene pair for Co-occurrence/min mutex_set_condition_function_list = [] # Generate list of condition functions to test the permutation matrix for for genepair in genepairs: ConditionFunction = Condition(None) condition_dict = {} condition_dict['Genes'] = tuple(genepair) condition_dict['Overlap'] = len(set.intersection(*[self.geneToCases_orig[gene] for gene in condition_dict['Genes']])) condition_dict['Mutex'] = True ConditionFunction.set_params([condition_dict]) mutex_set_condition_function_list.append((genepair, ConditionFunction)) print "Done. Now, calulating p-values of hypotheses..." # Generate co-occurring pairs if parallel_compute_number: mutex_pair_to_pvalue = pac.parallel_compute_new(self.set_to_pvalue, [mutex_set_condition_function_list], mutex_set_condition_function_list, 0, pac.partition_inputs, {0: pac.combine_dictionaries}, number=parallel_compute_number, procnumber=parallel_compute_number) else: mutex_pair_to_pvalue = self.set_to_pvalue(mutex_set_condition_function_list) print "Done. Now, finding mutually exclusive pairs" # Generate dictionary for each pair. Optionally analyze each mutex set as well. mpairsdict = {} mgenedict = {} for genepair in mutex_pair_to_pvalue: if mutex_pair_to_pvalue[genepair] < p: mstats = mex.analyze_mutex_set_new(self.numCases, self.geneToCases_orig, self.patientToGenes_orig, geneset=tuple(genepair)) if mstats['Overlap'] <= maxOverlap: mstats['PermutationProbability'] = mutex_pair_to_pvalue[genepair] mpairsdict[genepair] = mstats gene1, gene2 = tuple(genepair) if gene1 not in mgenedict: mgenedict[gene1] = set() mgenedict[gene1].add(gene2) else: mgenedict[gene1].add(gene2) if gene2 not in mgenedict: mgenedict[gene2] = set() mgenedict[gene2].add(gene1) else: mgenedict[gene2].add(gene1) return mpairsdict, mgenedict