Ejemplo n.º 1
0
def main():

    mutationmatrix = '/Users/jlu96/maf/new/PRAD_broad/PRAD_broad-som.m2'
    patientFile = None #'/Users/jlu96/maf/new/PRAD_broad/shared_patients.plst'
    geneFile = None #'/Users/jlu96/conte/jlu/REQUIREDFILES_OnlyLoss2/COSMICGenes_OnlyLoss.txt'
    load_directory = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LoadMatrices'
    minFreq = 0
    num_permutations = 20
    binary_perm_method = False
    Q = 100
    write_matrices = True
    matrixdirectory = '/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/LoadMatrices'
        #'/Users/jlu96/conte/jlu/Analyses/CooccurImprovement/SARC_broad-som-jl-' + ('matrix' if binary_perm_method else 'network')
    outmutexfile = matrixdirectory + '/mutex' + str(num_permutations) + str(time.time()) + '.tsv'
    outcooccurfile = matrixdirectory + '/cooccur' + str(num_permutations)  + str(time.time()) + '.tsv'
    outseedsfile = matrixdirectory + '/seeds' + str(time.time()) + '.tsv'


    if not os.path.exists(os.path.dirname(matrixdirectory)):
        os.makedirs(os.path.dirname(matrixdirectory))


    numGenes, numCases, genes, patients, geneToCases, patientToGenes = mex.load_mutation_data(mutationmatrix, patientFile, geneFile, minFreq)

    print "numGenes ", numGenes, " and numCases ", numCases

    for patient in patients:
        if not patientToGenes[patient]:
            patientToGenes.pop(patient)
            print patient, "popped"

    # Generate Permutation Matrices
    pm = PermutationMatrices(geneToCases, patientToGenes, num_permutations, Q=Q, matrixdirectory=matrixdirectory,
                             binary_perm_method=binary_perm_method, write_matrices=write_matrices, load_directory=load_directory,
                             geneFile=geneFile, patientFile=patientFile, minFreq=minFreq)

    # Make list of pairs from highly mutated genes
    test_genes = [gene for gene in genes if len(geneToCases[gene]) > 5]
    # for test_gene in test_genes:
    #     print test_gene
    genepairs = met.getgenepairs(geneToCases, test_genes)
    print "Number of pairs to test ", len(genepairs)





    # CALCULATE MUTEX

    # Create a list of ConditionFunctions that you must later initialize...
    ConditionFunctions = range(len(genepairs))
    mutex_set_condition_function_list = []

    # Generate set_condition_function_list
    for i in range(len(genepairs)):
        genepair = genepairs[i]

        condition_dict = {}
        condition_dict['Genes'] = tuple(genepair)
        condition_dict['Overlap'] = len(set.intersection(*[geneToCases[gene] for gene in condition_dict['Genes']]))
        condition_dict['Mutex'] = True

        ConditionFunctions[i] = Condition([condition_dict])

        if [condition_dict] != ConditionFunctions[i].conditions:
            print condition_dict, ConditionFunctions[i].conditions


        mutex_set_condition_function_list.append((genepair, ConditionFunctions[i]))

    print "Finished mutex condition function list"

    t= time.time()
    # Calculate pvalues for mutual exclusivity
    pair_to_mutex = {}

    pair_to_mutex_network_pvalue = pm.set_to_pvalue(mutex_set_condition_function_list)
    print "mutex pair network pvalues finished in ", time.time() - t

    for genepair in genepairs:
        pair_to_mutex[genepair] = mex.analyze_mutex_set_new(numCases, geneToCases, patientToGenes, genepair)
        pair_to_mutex[genepair]['NetworkProbability'] = pair_to_mutex_network_pvalue[genepair]




    # Write to output
    with open(outmutexfile, 'w') as csvfile:
        fieldnames = pair_to_mutex[genepairs[0]].keys()
        writer = csv.DictWriter(csvfile, delimiter='\t', fieldnames=fieldnames)
        writer.writeheader()
        for genepair in pair_to_mutex:
            writer.writerow(pair_to_mutex[genepair])



    # CALCULATE COOCCUR

    cooccur_set_condition_function_list = []

    # Generate set_condition_function_list
    for genepair in genepairs:
        ConditionFunction = Condition(None)

        condition_dict = {}
        condition_dict['Genes'] = tuple(genepair)
        condition_dict['Overlap'] = len(set.intersection(*[geneToCases[gene] for gene in condition_dict['Genes']]))
        condition_dict['Mutex'] = False

        ConditionFunction.set_params([condition_dict])

        cooccur_set_condition_function_list.append((genepair, ConditionFunction))



    t= time.time()
    # Calculate pvalues for mutual exclusivity
    pair_to_cooccur = {}

    pair_to_cooccur_network_pvalue = pm.set_to_pvalue(cooccur_set_condition_function_list)
    print "cooccur pair network pvalues finished in ", time.time() - t

    for genepair in genepairs:
        pair_to_cooccur[genepair] = mex.analyze_cooccur_set_new(numCases, geneToCases, patientToGenes, genepair)
        pair_to_cooccur[genepair]['NetworkProbability'] = pair_to_cooccur_network_pvalue[genepair]




    # Write to output
    with open(outcooccurfile, 'w') as csvfile:
        fieldnames = pair_to_cooccur[genepairs[0]].keys()
        writer = csv.DictWriter(csvfile, delimiter='\t', fieldnames=fieldnames)
        writer.writeheader()
        for genepair in pair_to_cooccur:
            writer.writerow(pair_to_cooccur[genepair])


    # Write seeds to output
    with open(outseedsfile, 'w') as csvfile:
        writer = csv.writer(csvfile, delimiter='\t')
        for seed in pm.seeds:
            writer.writerow([seed])
Ejemplo n.º 2
0
    def complete_cooccurpairs(self, genepairs, p=0.05, minCooccur=1, min_cooccurrence_ratio=0.0, parallel_compute_number=0,
                              compute_scores=True):
        """
        :param genepairs:
        :param cprob:
        :param minCooccur:
        :param min_cooccurrence_ratio:
        :param parallel_compute_number:
        :return: cpairsdict, cgenedict
        """


        print "Generating list of", len(genepairs), " co-occurring hypotheses to test on permutation matrices..."
        # Generate condition functions after analyzing each gene pair for Co-occurrence/min
        cooccur_set_condition_function_list = []

        # Generate list of condition functions to test the permutation matrix for
        for genepair in genepairs:
            ConditionFunction = Condition(None)

            condition_dict = {}
            condition_dict['Genes'] = tuple(genepair)
            condition_dict['Overlap'] = len(set.intersection(*[self.geneToCases_orig[gene] for gene in condition_dict['Genes']]))
            condition_dict['Mutex'] = False

            ConditionFunction.set_params([condition_dict])

            cooccur_set_condition_function_list.append((genepair, ConditionFunction))

        print "Done. Now, calulating p-values of hypotheses..."

        # Generate co-occurring pairs
        if parallel_compute_number:
            cooccur_pair_to_pvalue = pac.parallel_compute_new(self.set_to_pvalue, [cooccur_set_condition_function_list],
                                                         cooccur_set_condition_function_list, 0, pac.partition_inputs, {0: pac.combine_dictionaries},
                                                         number=parallel_compute_number,
                                                         procnumber=parallel_compute_number)
        else:
            cooccur_pair_to_pvalue = self.set_to_pvalue(cooccur_set_condition_function_list)


        print "Done. Now, finding co-occurring pairs"
        # Generate dictionary for each pair. Optionally analyze each cooccur set as well.
        cpairsdict = {}
        cgenedict = {}

        for genepair in cooccur_pair_to_pvalue:
            if cooccur_pair_to_pvalue[genepair] < p:

                cstats = mex.analyze_cooccur_set_new(self.numCases, self.geneToCases_orig, self.patientToGenes_orig,
                                                     geneset=tuple(genepair), compute_scores=compute_scores)


                if cstats['Overlap'] >= minCooccur and cstats['CooccurrenceRatio'] >= min_cooccurrence_ratio:


                    cstats['PermutationProbability'] = cooccur_pair_to_pvalue[genepair]
                    cpairsdict[genepair] = cstats
                    gene1, gene2 = tuple(genepair)
                    if gene1 not in cgenedict:
                        cgenedict[gene1] = set()
                        cgenedict[gene1].add(gene2)
                    else:
                        cgenedict[gene1].add(gene2)

                    if gene2 not in cgenedict:
                        cgenedict[gene2] = set()
                        cgenedict[gene2].add(gene1)
                    else:
                        cgenedict[gene2].add(gene1)

        return cpairsdict, cgenedict