Esempio n. 1
0
 def significance_one_vs_any(self) -> Series:
     """
     Return the probability that the response to each question is higher than
     a randomly selected other question.
     """
     keys = list(self._item_dict.keys())
     results = []
     for key in keys:
         other_keys = [k for k in keys if k != key]
         data_one = self._item_dict[key].make_features()
         data_rest = concat(
             [self._item_dict[k].make_features() for k in other_keys],
             axis=0)
         bb_one = BetaBinomialConjugate(alpha=1,
                                        beta=1,
                                        n=len(data_one),
                                        k=data_one.sum())
         bb_rest = BetaBinomialConjugate(alpha=1,
                                         beta=1,
                                         n=len(data_rest),
                                         k=data_rest.sum())
         results.append({
             'name': key,
             'p': bb_one.posterior() > bb_rest.posterior()
         })
     results_data = DataFrame(results).set_index('name')['p']
     return results_data
Esempio n. 2
0
 def significance_one_vs_all(self) -> Series:
     """
     Return the probabilities that a random respondent is more likely to
     answer each one category than all others combined.
     """
     results = []
     for category in self.categories:
         try:
             category_count = self.data.value_counts()[category]
         except KeyError:
             category_count = 0
         num_responses = len(self.data.dropna())
         bb_category = BetaBinomialConjugate(alpha=1,
                                             beta=1,
                                             n=num_responses,
                                             k=category_count)
         bb_rest = BetaBinomialConjugate(alpha=1,
                                         beta=1,
                                         n=num_responses,
                                         k=num_responses - category_count)
         results.append({
             'category': category,
             'p': bb_category.posterior() > bb_rest.posterior()
         })
     return DataFrame(results).set_index('category')['p']
Esempio n. 3
0
 def __gt__(self, other: 'LikertQuestion') -> float:
     """
     Return the probability that the posterior estimate for the probability
     of max-rating is greater in self than other.
     """
     data_self = self.make_features()
     data_other = other.make_features()
     bb_self = BetaBinomialConjugate(alpha=1,
                                     beta=1,
                                     n=len(data_self),
                                     k=data_self.sum())
     bb_other = BetaBinomialConjugate(alpha=1,
                                      beta=1,
                                      n=len(data_other),
                                      k=data_other.sum())
     return bb_self.posterior() > bb_other.posterior()
Esempio n. 4
0
    def prob_superior(self, question: CategoricalQuestion,
                      attribute: SingleCategoryAttribute,
                      exp_attr_values: List[str], exp_answers: List[str],
                      ctl_attr_values: List[str],
                      ctl_answers: List[str]) -> BBProbSuperiorResult:
        """
        Calculate the probability that the number of responses from the
        experimental group in `exp_answers` is significantly higher than the
        number of responses from the control group in `ctl_answers`.

        N.B. to assess the effect of respondent attributes, `exp_answers` and
        `ctl_answers` should be identical.

        :param question: The question to consider.
        :param attribute: The attribute to use.
        :param exp_attr_values: The attribute values of the experimental group.
        :param exp_answers: The answers to count in the experimental group.
        :param ctl_attr_values: The attribute values of the control group.
        :param ctl_answers: The answers to count in the control group.
        """
        # find n and k for experimental respondent and answer group
        n_exp = self.count_responses(question=question,
                                     condition_category=attribute,
                                     condition_values=exp_attr_values)
        k_exp = self.count_responses(question=question,
                                     answers=exp_answers,
                                     condition_category=attribute,
                                     condition_values=exp_attr_values)
        # find n and k for control respondent and answer group
        n_ctl = self.count_responses(question=question,
                                     condition_category=attribute,
                                     condition_values=ctl_attr_values)
        k_ctl = self.count_responses(question=question,
                                     answers=ctl_answers,
                                     condition_category=attribute,
                                     condition_values=ctl_attr_values)
        # create beta-binomial distribution for each group
        bb_exp = BetaBinomialConjugate(alpha=1, beta=1, n=n_exp, m=k_exp)
        bb_ctl = BetaBinomialConjugate(alpha=1, beta=1, n=n_ctl, m=k_ctl)
        # calculate probability of superiority of test group
        p_superior = bb_exp > bb_ctl

        return BBProbSuperiorResult(
            p_superior=p_superior,
            experimental_mean=bb_exp.posterior().mean(),
            control_mean=bb_ctl.posterior().mean())
Esempio n. 5
0
    def significance_one_vs_one(self) -> DataFrame:
        """
        Return the probability that a random respondent is more likely to answer
        each category than each other.
        """
        results = []
        for category_1, category_2 in product(self._categories,
                                              self._categories):
            try:
                category_1_count = self._data.value_counts()[category_1]
            except KeyError:
                category_1_count = 0
            try:
                category_2_count = self._data.value_counts()[category_2]
            except KeyError:
                category_2_count = 0
            num_responses = len(self._data.dropna())
            bb_category_1 = BetaBinomialConjugate(alpha=1,
                                                  beta=1,
                                                  n=num_responses,
                                                  k=category_1_count)
            bb_category_2 = BetaBinomialConjugate(alpha=1,
                                                  beta=1,
                                                  n=num_responses,
                                                  k=category_2_count)
            results.append({
                'category_1':
                category_1,
                'category_2':
                category_2,
                'p':
                bb_category_1.posterior() > bb_category_2.posterior()
            })

        results_data = DataFrame(results)
        pt = pivot_table(data=results_data,
                         index='category_1',
                         columns='category_2',
                         values='p')
        return pt