Example #1
0
    def complete_mutexpairs(self, genepairs, p=0.05, maxOverlap=200, parallel_compute_number=0):


        print "Generating list of", len(genepairs), " mutually exclusive hypotheses to test on permutation matrices..."
        # Generate condition functions after analyzing each gene pair for Co-occurrence/min
        mutex_set_condition_function_list = []

        # Generate list of condition functions to test the permutation matrix for
        for genepair in genepairs:
            ConditionFunction = Condition(None)

            condition_dict = {}
            condition_dict['Genes'] = tuple(genepair)
            condition_dict['Overlap'] = len(set.intersection(*[self.geneToCases_orig[gene] for gene in condition_dict['Genes']]))
            condition_dict['Mutex'] = True

            ConditionFunction.set_params([condition_dict])

            mutex_set_condition_function_list.append((genepair, ConditionFunction))

        print "Done. Now, calulating p-values of hypotheses..."

        # Generate co-occurring pairs
        if parallel_compute_number:
            mutex_pair_to_pvalue = pac.parallel_compute_new(self.set_to_pvalue, [mutex_set_condition_function_list],
                                                         mutex_set_condition_function_list, 0, pac.partition_inputs, {0: pac.combine_dictionaries},
                                                         number=parallel_compute_number,
                                                         procnumber=parallel_compute_number)
        else:
            mutex_pair_to_pvalue = self.set_to_pvalue(mutex_set_condition_function_list)


        print "Done. Now, finding mutually exclusive pairs"
        # Generate dictionary for each pair. Optionally analyze each mutex set as well.
        mpairsdict = {}
        mgenedict = {}

        for genepair in mutex_pair_to_pvalue:
            if mutex_pair_to_pvalue[genepair] < p:

                mstats = mex.analyze_mutex_set_new(self.numCases, self.geneToCases_orig, self.patientToGenes_orig,
                                                     geneset=tuple(genepair))

                if mstats['Overlap'] <= maxOverlap:

                    mstats['PermutationProbability'] = mutex_pair_to_pvalue[genepair]

                    mpairsdict[genepair] = mstats
                    gene1, gene2 = tuple(genepair)
                    if gene1 not in mgenedict:
                        mgenedict[gene1] = set()
                        mgenedict[gene1].add(gene2)
                    else:
                        mgenedict[gene1].add(gene2)

                    if gene2 not in mgenedict:
                        mgenedict[gene2] = set()
                        mgenedict[gene2].add(gene1)
                    else:
                        mgenedict[gene2].add(gene1)


        return mpairsdict, mgenedict
Example #2
0
    def complete_cooccurpairs(self, genepairs, p=0.05, minCooccur=1, min_cooccurrence_ratio=0.0, parallel_compute_number=0,
                              compute_scores=True):
        """
        :param genepairs:
        :param cprob:
        :param minCooccur:
        :param min_cooccurrence_ratio:
        :param parallel_compute_number:
        :return: cpairsdict, cgenedict
        """


        print "Generating list of", len(genepairs), " co-occurring hypotheses to test on permutation matrices..."
        # Generate condition functions after analyzing each gene pair for Co-occurrence/min
        cooccur_set_condition_function_list = []

        # Generate list of condition functions to test the permutation matrix for
        for genepair in genepairs:
            ConditionFunction = Condition(None)

            condition_dict = {}
            condition_dict['Genes'] = tuple(genepair)
            condition_dict['Overlap'] = len(set.intersection(*[self.geneToCases_orig[gene] for gene in condition_dict['Genes']]))
            condition_dict['Mutex'] = False

            ConditionFunction.set_params([condition_dict])

            cooccur_set_condition_function_list.append((genepair, ConditionFunction))

        print "Done. Now, calulating p-values of hypotheses..."

        # Generate co-occurring pairs
        if parallel_compute_number:
            cooccur_pair_to_pvalue = pac.parallel_compute_new(self.set_to_pvalue, [cooccur_set_condition_function_list],
                                                         cooccur_set_condition_function_list, 0, pac.partition_inputs, {0: pac.combine_dictionaries},
                                                         number=parallel_compute_number,
                                                         procnumber=parallel_compute_number)
        else:
            cooccur_pair_to_pvalue = self.set_to_pvalue(cooccur_set_condition_function_list)


        print "Done. Now, finding co-occurring pairs"
        # Generate dictionary for each pair. Optionally analyze each cooccur set as well.
        cpairsdict = {}
        cgenedict = {}

        for genepair in cooccur_pair_to_pvalue:
            if cooccur_pair_to_pvalue[genepair] < p:

                cstats = mex.analyze_cooccur_set_new(self.numCases, self.geneToCases_orig, self.patientToGenes_orig,
                                                     geneset=tuple(genepair), compute_scores=compute_scores)


                if cstats['Overlap'] >= minCooccur and cstats['CooccurrenceRatio'] >= min_cooccurrence_ratio:


                    cstats['PermutationProbability'] = cooccur_pair_to_pvalue[genepair]
                    cpairsdict[genepair] = cstats
                    gene1, gene2 = tuple(genepair)
                    if gene1 not in cgenedict:
                        cgenedict[gene1] = set()
                        cgenedict[gene1].add(gene2)
                    else:
                        cgenedict[gene1].add(gene2)

                    if gene2 not in cgenedict:
                        cgenedict[gene2] = set()
                        cgenedict[gene2].add(gene1)
                    else:
                        cgenedict[gene2].add(gene1)

        return cpairsdict, cgenedict