Ejemplo n.º 1
0
    def plot_jpt(self, other: 'MultiCategoryPTMixin', **kwargs) -> Axes:
        """
        Plot a joint probability table of self and other.

        :param other: Another MultiCategoryPTMixin to plot against.
        :param kwargs: See utils.plots.plot_pt
        """
        if isinstance(other, MultiCategoryPTMixin):
            other_data = other.make_features(naming='{{choice}}')
        else:
            # assume single category
            other_data = other.data
        jpt = create_jpt(data_1=self.make_features(naming='{{choice}}'),
                         data_2=other_data,
                         prob_1_name=self.name,
                         prob_2_name=other.name,
                         prob_1_order=self.categories,
                         prob_2_order=other.categories)
        if 'transpose' not in kwargs.keys():
            kwargs['transpose'] = False
        if 'var_sep' not in kwargs.keys():
            kwargs['var_sep'] = ','
        if 'dividers' not in kwargs.keys():
            kwargs['dividers'] = False
        ax = plot_pt(pt=jpt, **kwargs)
        return ax
Ejemplo n.º 2
0
    def plot_jpt(self,
                 item_name: str = 'item',
                 value_name: str = 'value',
                 item_names: Optional[List[str]] = None,
                 **kwargs) -> Axes:
        """
        Plot a joint probability table of the answer values for each question
        with the key to the question.

        :param item_name: Name for the collection of questions or attributes to
        condition on.
        :param value_name: Name for the collection of values to calculate
                           conditional probability of.
        :param item_names: Optional list of names for the questions or
                           attributes instead of the keys.
        :param kwargs: See utils.plots.plot_pt
        """
        jpt = self.jpt(item_name=item_name,
                       value_name=value_name,
                       item_names=item_names)
        if 'transpose' not in kwargs.keys():
            kwargs['transpose'] = True
        if 'var_sep' not in kwargs:
            kwargs['var_sep'] = ','
        if 'dividers' not in kwargs:
            kwargs['dividers'] = False
        ax = plot_pt(pt=jpt, **kwargs)
        return ax
Ejemplo n.º 3
0
    def plot_jct(self, other: 'SingleCategoryPTMixin',
                 significance: bool = False,
                 sig_colors: Tuple[str, str] = ('#00ff00', '#ff0000'),
                 sig_values: Tuple[float, float] = (0.945, 0.055),
                 **kwargs) -> Axes:
        """
        Plot a joint count table of self and other.

        :param other: Another SingleCategory to plot against.
        :param significance: Whether to add significance markers to the plot.
                             Equal to p(X=x1,Y=y1) > p(X≠x1, Y≠y1).
        :param sig_colors: Tuple of (high, low) colors for highlighting
                           significance.
        :param sig_values: Tuple of (high, low) values for assessing
                           significance.
        :param kwargs: See utils.plots.plot_pt
        """
        if self.name == other.name:
            raise ValueError('categoricals must have different names')
        if isinstance(other, SingleCategoryPTMixin):
            other_data = other.data
        else:
            # assume multi category
            other_data = other.make_features(naming='{{choice}}')
        # calculate jpt
        jct = create_jct(
            data_1=self.data, data_2=other_data,
            count_1_name=self.name, count_2_name=other.name,
            count_1_order=self.category_names,
            count_2_order=other.category_names
        )
        if 'transpose' not in kwargs.keys():
            kwargs['transpose'] = False
        if 'var_sep' not in kwargs.keys():
            kwargs['var_sep'] = '&'
        if 'dividers' not in kwargs.keys():
            kwargs['dividers'] = False
        kwargs['as_percent'] = False
        kwargs['precision'] = 0
        if 'p_max' not in kwargs.keys():
            kwargs['p_max'] = None
        ax = plot_pt(pt=jct, **kwargs)

        # draw significance values
        if significance and isinstance(other, SingleCategoryPTMixin):
            self._draw_significance_values(
                other=other,
                sig_colors=sig_colors,
                sig_values=sig_values,
                transpose=kwargs['transpose'],
                ax=ax
            )

        return ax
Ejemplo n.º 4
0
    def plot_distribution(self, **kwargs) -> Axes:
        """
        Plot the Conditional Probability Table as a heatmap.

        :param kwargs: See utils.plots.plot_pt
        """
        # calculate create_cpt and fix labels
        if 'transpose' not in kwargs.keys():
            kwargs['transpose'] = True
        if 'var_sep' not in kwargs:
            kwargs['var_sep'] = '|'
        ax = plot_pt(pt=self._data, **kwargs)
        return ax
Ejemplo n.º 5
0
    def plot_jpt(self, prob_1: Union[str, Categorical],
                 prob_2: Union[str, Categorical], **kwargs) -> Axes:
        """
        Plot a joint probability table.

        :param prob_1: The question or attribute to find probability of.
        :param prob_2: The question or attribute to condition on.
        :param kwargs: See survey.utils.plots.plot_pt.
        """
        # calculate create_cpt and fix labels
        jpt = self.jpt(prob_1, prob_2)
        if 'var_sep' not in kwargs:
            kwargs['var_sep'] = ','
        ax = plot_pt(pt=jpt, **kwargs)
        return ax
Ejemplo n.º 6
0
    def plot_cpt(self, probability: Union[str, Categorical],
                 condition: Union[str, Categorical], **kwargs) -> Axes:
        """
        Plot a conditional probability table.

        :param probability: The question or attribute to find probability of.
        :param condition: The question or attribute to condition on.
        """
        # calculate create_cpt and fix labels
        cpt = self.cpt(probability, condition)
        if 'transpose' not in kwargs.keys():
            kwargs['transpose'] = False
        if 'var_sep' not in kwargs:
            kwargs['var_sep'] = '|'
        ax = plot_pt(pt=cpt, **kwargs)
        return ax
Ejemplo n.º 7
0
    def plot_cpt(self, condition: 'MultiCategoryPTMixin', **kwargs) -> Axes:
        """
        Plot a conditional probability table of self and other.

        :param condition: Another MultiCategoryPTMixin to condition on.
        :param kwargs: See utils.plots.plot_pt
        """
        if isinstance(condition, MultiCategoryPTMixin):
            cond_data = condition.make_features(naming='{{choice}}')
        else:
            # assume single category
            cond_data = condition.data
        jpt = create_cpt(prob_data=self.make_features(naming='{{choice}}'),
                         cond_data=cond_data,
                         prob_name=self.name,
                         cond_name=condition.name,
                         prob_order=self.categories,
                         cond_order=condition.categories)
        if 'var_sep' not in kwargs.keys():
            kwargs['var_sep'] = '|'
        ax = plot_pt(pt=jpt, **kwargs)
        return ax
Ejemplo n.º 8
0
    def plot_cpt(self,
                 condition: 'SingleCategoryPTMixin',
                 significance: Optional[str] = None,
                 sig_colors: Tuple[str, str] = ('#00ff00', '#ff0000'),
                 sig_values: Tuple[float, float] = (0.945, 0.055),
                 **kwargs) -> Axes:
        """
        Plot a conditional probability table of self and other.

        :param condition: Another SingleCategory to condition on.
        :param significance: One of ['prob', 'cond'].
                            'prob' gives p(X=x1|Y=y1) > p(X≠x1|Y=y1)
                            'cond' gives p(X=x1|Y=y1) > p(X=x1|Y≠y1)
        :param sig_colors: Tuple of (high, low) colors for highlighting
                           significance.
                           Equal to p(X=x1,Y=y1) > p(X≠x1, Y≠y1).
        :param sig_values: Tuple of (high, low) values for assessing
                           significance.
        :param kwargs: See utils.plots.plot_pt
        """
        if self.name == condition.name:
            raise ValueError('categoricals must have different names')
        if isinstance(condition, SingleCategoryPTMixin):
            condition_data = condition.data
        else:
            # assume multi category
            condition_data = condition.make_features(naming='{{choice}}')
        jpt = create_cpt(
            prob_data=self.data, cond_data=condition_data,
            prob_name=self.name, cond_name=condition.name,
            prob_order=self.category_names, cond_order=condition.category_names
        )
        if 'var_sep' not in kwargs.keys():
            kwargs['var_sep'] = '|'
        if not 'transpose' in kwargs.keys():
            kwargs['transpose'] = True
        ax = plot_pt(pt=jpt, **kwargs)

        # draw significance values
        if significance is not None:
            counts = count_coincidences(
                data_1=self.data, data_2=condition_data,
                column_1=self.name, column_2=condition.name,
                column_1_order=self.category_names,
                column_2_order=condition.category_names
            )
            if isinstance(condition, SingleCategoryPTMixin):
                results = []
                if significance == 'prob':
                    for cond_cat in condition.category_names:
                        n_cond = counts.loc[cond_cat].sum()
                        for prob_cat in self.category_names:
                            m_prob_cond = counts.loc[cond_cat, prob_cat]
                            m_any = (
                                (n_cond - m_prob_cond) /
                                (len(self.category_names) - 1)
                            )
                            p = (
                                BetaBinomialConjugate(
                                    alpha=1, beta=1, n=n_cond, k=m_prob_cond
                                ).posterior() > BetaBinomialConjugate(
                                    alpha=1, beta=1, n=n_cond, k=m_any
                                ).posterior()
                            )
                            results.append({
                                self.name: prob_cat,
                                condition.name: cond_cat,
                                'p': p
                            })
                elif significance == 'cond':
                    n = counts.sum().sum()
                    for prob_cat in self.category_names:
                        n_prob = counts[prob_cat].sum()
                        for cond_cat in condition.category_names:
                            n_cond = counts.loc[cond_cat].sum()
                            m_prob_cond = counts.loc[cond_cat, prob_cat]
                            m_any = n_prob - m_prob_cond
                            n_any = n - n_cond
                            p = (
                                BetaBinomialConjugate(
                                    n=n_cond, k=m_prob_cond, alpha=1, beta=1,
                                ).posterior() > BetaBinomialConjugate(
                                    n=n_any, k=m_any, alpha=1, beta=1
                                ).posterior()
                            )
                            results.append({
                                self.name: prob_cat,
                                condition.name: cond_cat,
                                'p': p
                            })
                else:
                    raise ValueError(
                        "significance must be one of ['prob', 'cond']"
                    )

                results_data = DataFrame(results)

            else:
                raise NotImplementedError(
                    'significance not implemented for MultiCategories'
                )

            min_add = 0.1
            max_add = 0.9
            line_width = 2

            for _, row in results_data.iterrows():
                color = None
                if row['p'] >= sig_values[0]:
                    color = sig_colors[0]
                elif row['p'] < sig_values[1]:
                    color = sig_colors[1]
                if color is None:
                    continue
                if not kwargs['transpose']:
                    x = self.category_names.index(row[self.name])
                    y = condition.category_names.index(row[condition.name])
                    if significance == 'prob':
                        ax.plot([x + min_add, x + min_add],
                                [y + min_add, y + max_add],
                                color, linewidth=line_width)
                        ax.plot([x + max_add, x + max_add],
                                [y + min_add, y + max_add],
                                color, linewidth=line_width)
                    elif significance == 'cond':
                        ax.plot([x + min_add, x + max_add],
                                [y + min_add, y + min_add],
                                color, linewidth=line_width)
                        ax.plot([x + min_add, x + max_add],
                                [y + max_add, y + max_add],
                                color, linewidth=line_width)
                else:
                    y = self.category_names.index(row[self.name])
                    x = condition.category_names.index(row[condition.name])
                    if significance == 'prob':
                        ax.plot([x + min_add, x + max_add],
                                [y + min_add, y + min_add],
                                color, linewidth=line_width)
                        ax.plot([x + min_add, x + max_add],
                                [y + max_add, y + max_add],
                                color, linewidth=line_width)
                    elif significance == 'cond':
                        ax.plot([x + min_add, x + min_add],
                                [y + min_add, y + max_add],
                                color, linewidth=line_width)
                        ax.plot([x + max_add, x + max_add],
                                [y + min_add, y + max_add],
                                color, linewidth=line_width)
        return ax