Ejemplo n.º 1
0
    def prepare_run(self): # study_n should be the same in genome vs. observed vs. abundance_corrected
        '''
        :return: None
        '''
        if self.abcorr:
            self.study_an_frset = self.ui.get_sample_an_frset()
            self.term_study, self.go2ans_study_dict, study_n = ratio.count_terms_v2(self.study_an_frset, self.assoc_dict, self.obo_dag)

            if self.randomSample:
                self.pop_an_set = self.ui.get_background_an_set_random_sample()
                pop_n  = len(self.pop_an_set)
                self.term_pop, self.go2ans_pop_dict = ratio.count_terms(self.pop_an_set, self.assoc_dict, self.obo_dag)
            else:
                pop_n = study_n
                self.term_pop, self.go2ans_pop_dict = ratio.count_terms_abundance_corrected(self.ui, self.assoc_dict, self.obo_dag)

        else:
            if self.ui.col_background_an == 'Genome':
                self.study_an_frset = self.ui.get_sample_an_frset_genome()
            else:
                self.study_an_frset = self.ui.get_sample_an_frset()

            self.term_study, self.go2ans_study_dict, study_n = ratio.count_terms_v2(self.study_an_frset, self.assoc_dict, self.obo_dag)

            self.pop_an_set = self.ui.get_background_an_all_set()
            self.term_pop, self.go2ans_pop_dict, pop_n = ratio.count_terms_v2(self.pop_an_set, self.assoc_dict, self.obo_dag)

        self.run_study_v2(self.term_study, self.term_pop, study_n, pop_n)
Ejemplo n.º 2
0
def calc_qval_dbl(study_n, pop_n, pop, assoc, term_pop, obo_dag, T=500):
    """
    :param study_n: Integer (number of ANs from sample frequency)
    :param pop_n: Integer (number of ANs from background frequency = sample freq.)
    :param pop:
    :param assoc:
    :param term_pop:
    :param obo_dag:
    :param T:
    :return:
    """
    distribution = []
    for i in range(T):
        new_study = random.sample(pop, study_n) # add pop and study
        new_term_study = count_terms(new_study, assoc, obo_dag)[0] #!!!
        smallest_p = 1
        for term, study_count in list(new_term_study.items()):
            pop_count = term_pop[term]
            a = study_count
            b = study_n - study_count
            c = pop_count
            d = pop_n - pop_count
            p_two_tail  = stats.fisher_exact([[a, b], [c, d]], alternative='greater')[1]
            if p_two_tail < smallest_p:
                smallest_p = p_two_tail
        distribution.append(smallest_p)
        if i % 10  == 0:
            print("Sample {0} / {1}: p-value {2}".\
                        format(i, T, smallest_p), file=sys.stderr)
    return distribution
Ejemplo n.º 3
0
def calc_qval_dbl(study_n, pop_n, pop, assoc, term_pop, obo_dag, T=500):
    """
    :param study_n: Integer (number of ANs from sample frequency)
    :param pop_n: Integer (number of ANs from background frequency = sample freq.)
    :param pop:
    :param assoc:
    :param term_pop:
    :param obo_dag:
    :param T:
    :return:
    """
    distribution = []
    for i in range(T):
        new_study = random.sample(pop, study_n) # add pop and study
        new_term_study = count_terms(new_study, assoc, obo_dag)[0] #!!!
        smallest_p = 1
        for term, study_count in list(new_term_study.items()):
            pop_count = term_pop[term]
            a = study_count
            b = study_n - study_count
            c = pop_count
            d = pop_n - pop_count
            p_two_tail  = stats.fisher_exact([[a, b], [c, d]], alternative='greater')[1]
            if p_two_tail < smallest_p:
                smallest_p = p_two_tail
        distribution.append(smallest_p)
        if i % 10  == 0:
            print("Sample {0} / {1}: p-value {2}".\
                        format(i, T, smallest_p), file=sys.stderr)
    return distribution