def main(): mutationmatrix = '/Users/jlu96/maf/new/PRAD_broad/PRAD_broad-som.m2' patientFile = None #'/Users/jlu96/maf/new/PRAD_broad/shared_patients.plst' geneFile = None #'/Users/jlu96/conte/jlu/REQUIREDFILES_OnlyLoss2/COSMICGenes_OnlyLoss.txt' load_directory = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LoadMatrices' minFreq = 0 num_permutations = 20 binary_perm_method = False Q = 100 write_matrices = True matrixdirectory = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LoadMatrices' #'/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/SARC_broad-som-jl-' + ('matrix' if binary_perm_method else 'network') outmutexfile = matrixdirectory + '/mutex' + str(num_permutations) + str(time.time()) + '.tsv' outcooccurfile = matrixdirectory + '/cooccur' + str(num_permutations) + str(time.time()) + '.tsv' outseedsfile = matrixdirectory + '/seeds' + str(time.time()) + '.tsv' if not os.path.exists(os.path.dirname(matrixdirectory)): os.makedirs(os.path.dirname(matrixdirectory)) numGenes, numCases, genes, patients, geneToCases, patientToGenes = mex.load_mutation_data(mutationmatrix, patientFile, geneFile, minFreq) print "numGenes ", numGenes, " and numCases ", numCases for patient in patients: if not patientToGenes[patient]: patientToGenes.pop(patient) print patient, "popped" # Generate Permutation Matrices pm = PermutationMatrices(geneToCases, patientToGenes, num_permutations, Q=Q, matrixdirectory=matrixdirectory, binary_perm_method=binary_perm_method, write_matrices=write_matrices, load_directory=load_directory, geneFile=geneFile, patientFile=patientFile, minFreq=minFreq) # Make list of pairs from highly mutated genes test_genes = [gene for gene in genes if len(geneToCases[gene]) > 5] # for test_gene in test_genes: # print test_gene genepairs = met.getgenepairs(geneToCases, test_genes) print "Number of pairs to test ", len(genepairs) # CALCULATE MUTEX # Create a list of ConditionFunctions that you must later initialize... ConditionFunctions = range(len(genepairs)) mutex_set_condition_function_list = [] # Generate set_condition_function_list for i in range(len(genepairs)): genepair = genepairs[i] condition_dict = {} condition_dict['Genes'] = tuple(genepair) condition_dict['Overlap'] = len(set.intersection(*[geneToCases[gene] for gene in condition_dict['Genes']])) condition_dict['Mutex'] = True ConditionFunctions[i] = Condition([condition_dict]) if [condition_dict] != ConditionFunctions[i].conditions: print condition_dict, ConditionFunctions[i].conditions mutex_set_condition_function_list.append((genepair, ConditionFunctions[i])) print "Finished mutex condition function list" t= time.time() # Calculate pvalues for mutual exclusivity pair_to_mutex = {} pair_to_mutex_network_pvalue = pm.set_to_pvalue(mutex_set_condition_function_list) print "mutex pair network pvalues finished in ", time.time() - t for genepair in genepairs: pair_to_mutex[genepair] = mex.analyze_mutex_set_new(numCases, geneToCases, patientToGenes, genepair) pair_to_mutex[genepair]['NetworkProbability'] = pair_to_mutex_network_pvalue[genepair] # Write to output with open(outmutexfile, 'w') as csvfile: fieldnames = pair_to_mutex[genepairs[0]].keys() writer = csv.DictWriter(csvfile, delimiter='\t', fieldnames=fieldnames) writer.writeheader() for genepair in pair_to_mutex: writer.writerow(pair_to_mutex[genepair]) # CALCULATE COOCCUR cooccur_set_condition_function_list = [] # Generate set_condition_function_list for genepair in genepairs: ConditionFunction = Condition(None) condition_dict = {} condition_dict['Genes'] = tuple(genepair) condition_dict['Overlap'] = len(set.intersection(*[geneToCases[gene] for gene in condition_dict['Genes']])) condition_dict['Mutex'] = False ConditionFunction.set_params([condition_dict]) cooccur_set_condition_function_list.append((genepair, ConditionFunction)) t= time.time() # Calculate pvalues for mutual exclusivity pair_to_cooccur = {} pair_to_cooccur_network_pvalue = pm.set_to_pvalue(cooccur_set_condition_function_list) print "cooccur pair network pvalues finished in ", time.time() - t for genepair in genepairs: pair_to_cooccur[genepair] = mex.analyze_cooccur_set_new(numCases, geneToCases, patientToGenes, genepair) pair_to_cooccur[genepair]['NetworkProbability'] = pair_to_cooccur_network_pvalue[genepair] # Write to output with open(outcooccurfile, 'w') as csvfile: fieldnames = pair_to_cooccur[genepairs[0]].keys() writer = csv.DictWriter(csvfile, delimiter='\t', fieldnames=fieldnames) writer.writeheader() for genepair in pair_to_cooccur: writer.writerow(pair_to_cooccur[genepair]) # Write seeds to output with open(outseedsfile, 'w') as csvfile: writer = csv.writer(csvfile, delimiter='\t') for seed in pm.seeds: writer.writerow([seed])
def main(): mutationmatrix = '/Users/jlu96/maf/new/OV_broad/OV_broad-cna-jl.m2' patientFile = '/Users/jlu96/maf/new/OV_broad/shared_patients.plst' cpairfile = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LorenzoModel/Binomial/OV_broad-cna-jl-cpairs-min_cohort.txt' partitionfile = '/Users/jlu96/maf/new/OV_broad/OV_broad-cna-jl.ppf' load_partitions = True do_min_cohort = True geneFile = None minFreq = 0 test_minFreq = 100 compute_mutex = True include_cohort_info = False num_cohorts_list = [1,3, 5, 7] numGenes, numCases, genes, patients, geneToCases, patientToGenes = mex.load_mutation_data(mutationmatrix, patientFile, geneFile, minFreq) print "number of genes is ", numGenes if do_min_cohort: cohort_dict, clusterToProp, min_cohort = partition.load_patient_cohorts(partitionfile, patientToGenes) min_cohort_genes = set.union(*(patientToGenes[p] for p in min_cohort)) print "getting pairs" genepairs = met.getgenepairs(geneToCases, min_cohort_genes, test_minFreq=test_minFreq) print "Number of pairs ", len(genepairs) print "Normal cooccur test" t = time.time() cpairsdict, cgenedict = met.cooccurpairs(numCases, geneToCases, patientToGenes, genepairs, compute_mutex=compute_mutex) print "Normal cooccur done in ", time.time() - t print "Beginning cohorts" t = time.time() cpairsdict = add_BinomP_min_cohort_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict, min_cohort) print "Cohorts done in ", time.time() - t else: genepairs = met.getgenepairs(geneToCases, genes, test_minFreq=test_minFreq) print "Number of pairs ", len(genepairs) print "Normal cooccur test" cpairsdict, cgenedict = met.cooccurpairs(numCases, geneToCases, patientToGenes, genepairs, compute_mutex=compute_mutex) # print "Add binomial probability" # cpairsdict = add_BinomP_all_pairs(cpairsdict, geneToCases, patientToGenes) # undo print "Beginning cohorts" if load_partitions: cohort_dict = partition.load_patient_cohorts(partitionfile) cpairsdict = add_BinomP_cohorts_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict) else: for num_cohorts in num_cohorts_list: # get cohorts cohort_dict = generate_patient_cohorts(patientToGenes, num_cohorts) cpairsdict = add_BinomP_cohorts_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict) if include_cohort_info: cpairsdict = add_cohorts_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict) print "Writing to file..." met.writeanydict(cpairsdict, cpairfile)
def main(): mutationmatrix = '/Users/jlu96/maf/new/OV_broad/OV_broad-cna-jl.m2' patientFile = '/Users/jlu96/maf/new/OV_broad/shared_patients.plst' cpairfile = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LorenzoModel/Binomial/OV_broad-cna-jl-cpairs-min_cohort.txt' partitionfile = '/Users/jlu96/maf/new/OV_broad/OV_broad-cna-jl.ppf' load_partitions = True do_min_cohort = True geneFile = None minFreq = 0 test_minFreq = 100 compute_mutex = True include_cohort_info = False num_cohorts_list = [1, 3, 5, 7] numGenes, numCases, genes, patients, geneToCases, patientToGenes = mex.load_mutation_data( mutationmatrix, patientFile, geneFile, minFreq) print "number of genes is ", numGenes if do_min_cohort: cohort_dict, clusterToProp, min_cohort = partition.load_patient_cohorts( partitionfile, patientToGenes) min_cohort_genes = set.union(*(patientToGenes[p] for p in min_cohort)) print "getting pairs" genepairs = met.getgenepairs(geneToCases, min_cohort_genes, test_minFreq=test_minFreq) print "Number of pairs ", len(genepairs) print "Normal cooccur test" t = time.time() cpairsdict, cgenedict = met.cooccurpairs(numCases, geneToCases, patientToGenes, genepairs, compute_mutex=compute_mutex) print "Normal cooccur done in ", time.time() - t print "Beginning cohorts" t = time.time() cpairsdict = add_BinomP_min_cohort_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict, min_cohort) print "Cohorts done in ", time.time() - t else: genepairs = met.getgenepairs(geneToCases, genes, test_minFreq=test_minFreq) print "Number of pairs ", len(genepairs) print "Normal cooccur test" cpairsdict, cgenedict = met.cooccurpairs(numCases, geneToCases, patientToGenes, genepairs, compute_mutex=compute_mutex) # print "Add binomial probability" # cpairsdict = add_BinomP_all_pairs(cpairsdict, geneToCases, patientToGenes) # undo print "Beginning cohorts" if load_partitions: cohort_dict = partition.load_patient_cohorts(partitionfile) cpairsdict = add_BinomP_cohorts_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict) else: for num_cohorts in num_cohorts_list: # get cohorts cohort_dict = generate_patient_cohorts(patientToGenes, num_cohorts) cpairsdict = add_BinomP_cohorts_all_pairs( cpairsdict, geneToCases, patientToGenes, cohort_dict) if include_cohort_info: cpairsdict = add_cohorts_all_pairs(cpairsdict, geneToCases, patientToGenes, cohort_dict) print "Writing to file..." met.writeanydict(cpairsdict, cpairfile)