def plot_jpt(self, other: 'MultiCategoryPTMixin', **kwargs) -> Axes: """ Plot a joint probability table of self and other. :param other: Another MultiCategoryPTMixin to plot against. :param kwargs: See utils.plots.plot_pt """ if isinstance(other, MultiCategoryPTMixin): other_data = other.make_features(naming='{{choice}}') else: # assume single category other_data = other.data jpt = create_jpt(data_1=self.make_features(naming='{{choice}}'), data_2=other_data, prob_1_name=self.name, prob_2_name=other.name, prob_1_order=self.categories, prob_2_order=other.categories) if 'transpose' not in kwargs.keys(): kwargs['transpose'] = False if 'var_sep' not in kwargs.keys(): kwargs['var_sep'] = ',' if 'dividers' not in kwargs.keys(): kwargs['dividers'] = False ax = plot_pt(pt=jpt, **kwargs) return ax
def plot_jpt(self, item_name: str = 'item', value_name: str = 'value', item_names: Optional[List[str]] = None, **kwargs) -> Axes: """ Plot a joint probability table of the answer values for each question with the key to the question. :param item_name: Name for the collection of questions or attributes to condition on. :param value_name: Name for the collection of values to calculate conditional probability of. :param item_names: Optional list of names for the questions or attributes instead of the keys. :param kwargs: See utils.plots.plot_pt """ jpt = self.jpt(item_name=item_name, value_name=value_name, item_names=item_names) if 'transpose' not in kwargs.keys(): kwargs['transpose'] = True if 'var_sep' not in kwargs: kwargs['var_sep'] = ',' if 'dividers' not in kwargs: kwargs['dividers'] = False ax = plot_pt(pt=jpt, **kwargs) return ax
def plot_jct(self, other: 'SingleCategoryPTMixin', significance: bool = False, sig_colors: Tuple[str, str] = ('#00ff00', '#ff0000'), sig_values: Tuple[float, float] = (0.945, 0.055), **kwargs) -> Axes: """ Plot a joint count table of self and other. :param other: Another SingleCategory to plot against. :param significance: Whether to add significance markers to the plot. Equal to p(X=x1,Y=y1) > p(X≠x1, Y≠y1). :param sig_colors: Tuple of (high, low) colors for highlighting significance. :param sig_values: Tuple of (high, low) values for assessing significance. :param kwargs: See utils.plots.plot_pt """ if self.name == other.name: raise ValueError('categoricals must have different names') if isinstance(other, SingleCategoryPTMixin): other_data = other.data else: # assume multi category other_data = other.make_features(naming='{{choice}}') # calculate jpt jct = create_jct( data_1=self.data, data_2=other_data, count_1_name=self.name, count_2_name=other.name, count_1_order=self.category_names, count_2_order=other.category_names ) if 'transpose' not in kwargs.keys(): kwargs['transpose'] = False if 'var_sep' not in kwargs.keys(): kwargs['var_sep'] = '&' if 'dividers' not in kwargs.keys(): kwargs['dividers'] = False kwargs['as_percent'] = False kwargs['precision'] = 0 if 'p_max' not in kwargs.keys(): kwargs['p_max'] = None ax = plot_pt(pt=jct, **kwargs) # draw significance values if significance and isinstance(other, SingleCategoryPTMixin): self._draw_significance_values( other=other, sig_colors=sig_colors, sig_values=sig_values, transpose=kwargs['transpose'], ax=ax ) return ax
def plot_distribution(self, **kwargs) -> Axes: """ Plot the Conditional Probability Table as a heatmap. :param kwargs: See utils.plots.plot_pt """ # calculate create_cpt and fix labels if 'transpose' not in kwargs.keys(): kwargs['transpose'] = True if 'var_sep' not in kwargs: kwargs['var_sep'] = '|' ax = plot_pt(pt=self._data, **kwargs) return ax
def plot_jpt(self, prob_1: Union[str, Categorical], prob_2: Union[str, Categorical], **kwargs) -> Axes: """ Plot a joint probability table. :param prob_1: The question or attribute to find probability of. :param prob_2: The question or attribute to condition on. :param kwargs: See survey.utils.plots.plot_pt. """ # calculate create_cpt and fix labels jpt = self.jpt(prob_1, prob_2) if 'var_sep' not in kwargs: kwargs['var_sep'] = ',' ax = plot_pt(pt=jpt, **kwargs) return ax
def plot_cpt(self, probability: Union[str, Categorical], condition: Union[str, Categorical], **kwargs) -> Axes: """ Plot a conditional probability table. :param probability: The question or attribute to find probability of. :param condition: The question or attribute to condition on. """ # calculate create_cpt and fix labels cpt = self.cpt(probability, condition) if 'transpose' not in kwargs.keys(): kwargs['transpose'] = False if 'var_sep' not in kwargs: kwargs['var_sep'] = '|' ax = plot_pt(pt=cpt, **kwargs) return ax
def plot_cpt(self, condition: 'MultiCategoryPTMixin', **kwargs) -> Axes: """ Plot a conditional probability table of self and other. :param condition: Another MultiCategoryPTMixin to condition on. :param kwargs: See utils.plots.plot_pt """ if isinstance(condition, MultiCategoryPTMixin): cond_data = condition.make_features(naming='{{choice}}') else: # assume single category cond_data = condition.data jpt = create_cpt(prob_data=self.make_features(naming='{{choice}}'), cond_data=cond_data, prob_name=self.name, cond_name=condition.name, prob_order=self.categories, cond_order=condition.categories) if 'var_sep' not in kwargs.keys(): kwargs['var_sep'] = '|' ax = plot_pt(pt=jpt, **kwargs) return ax
def plot_cpt(self, condition: 'SingleCategoryPTMixin', significance: Optional[str] = None, sig_colors: Tuple[str, str] = ('#00ff00', '#ff0000'), sig_values: Tuple[float, float] = (0.945, 0.055), **kwargs) -> Axes: """ Plot a conditional probability table of self and other. :param condition: Another SingleCategory to condition on. :param significance: One of ['prob', 'cond']. 'prob' gives p(X=x1|Y=y1) > p(X≠x1|Y=y1) 'cond' gives p(X=x1|Y=y1) > p(X=x1|Y≠y1) :param sig_colors: Tuple of (high, low) colors for highlighting significance. Equal to p(X=x1,Y=y1) > p(X≠x1, Y≠y1). :param sig_values: Tuple of (high, low) values for assessing significance. :param kwargs: See utils.plots.plot_pt """ if self.name == condition.name: raise ValueError('categoricals must have different names') if isinstance(condition, SingleCategoryPTMixin): condition_data = condition.data else: # assume multi category condition_data = condition.make_features(naming='{{choice}}') jpt = create_cpt( prob_data=self.data, cond_data=condition_data, prob_name=self.name, cond_name=condition.name, prob_order=self.category_names, cond_order=condition.category_names ) if 'var_sep' not in kwargs.keys(): kwargs['var_sep'] = '|' if not 'transpose' in kwargs.keys(): kwargs['transpose'] = True ax = plot_pt(pt=jpt, **kwargs) # draw significance values if significance is not None: counts = count_coincidences( data_1=self.data, data_2=condition_data, column_1=self.name, column_2=condition.name, column_1_order=self.category_names, column_2_order=condition.category_names ) if isinstance(condition, SingleCategoryPTMixin): results = [] if significance == 'prob': for cond_cat in condition.category_names: n_cond = counts.loc[cond_cat].sum() for prob_cat in self.category_names: m_prob_cond = counts.loc[cond_cat, prob_cat] m_any = ( (n_cond - m_prob_cond) / (len(self.category_names) - 1) ) p = ( BetaBinomialConjugate( alpha=1, beta=1, n=n_cond, k=m_prob_cond ).posterior() > BetaBinomialConjugate( alpha=1, beta=1, n=n_cond, k=m_any ).posterior() ) results.append({ self.name: prob_cat, condition.name: cond_cat, 'p': p }) elif significance == 'cond': n = counts.sum().sum() for prob_cat in self.category_names: n_prob = counts[prob_cat].sum() for cond_cat in condition.category_names: n_cond = counts.loc[cond_cat].sum() m_prob_cond = counts.loc[cond_cat, prob_cat] m_any = n_prob - m_prob_cond n_any = n - n_cond p = ( BetaBinomialConjugate( n=n_cond, k=m_prob_cond, alpha=1, beta=1, ).posterior() > BetaBinomialConjugate( n=n_any, k=m_any, alpha=1, beta=1 ).posterior() ) results.append({ self.name: prob_cat, condition.name: cond_cat, 'p': p }) else: raise ValueError( "significance must be one of ['prob', 'cond']" ) results_data = DataFrame(results) else: raise NotImplementedError( 'significance not implemented for MultiCategories' ) min_add = 0.1 max_add = 0.9 line_width = 2 for _, row in results_data.iterrows(): color = None if row['p'] >= sig_values[0]: color = sig_colors[0] elif row['p'] < sig_values[1]: color = sig_colors[1] if color is None: continue if not kwargs['transpose']: x = self.category_names.index(row[self.name]) y = condition.category_names.index(row[condition.name]) if significance == 'prob': ax.plot([x + min_add, x + min_add], [y + min_add, y + max_add], color, linewidth=line_width) ax.plot([x + max_add, x + max_add], [y + min_add, y + max_add], color, linewidth=line_width) elif significance == 'cond': ax.plot([x + min_add, x + max_add], [y + min_add, y + min_add], color, linewidth=line_width) ax.plot([x + min_add, x + max_add], [y + max_add, y + max_add], color, linewidth=line_width) else: y = self.category_names.index(row[self.name]) x = condition.category_names.index(row[condition.name]) if significance == 'prob': ax.plot([x + min_add, x + max_add], [y + min_add, y + min_add], color, linewidth=line_width) ax.plot([x + min_add, x + max_add], [y + max_add, y + max_add], color, linewidth=line_width) elif significance == 'cond': ax.plot([x + min_add, x + min_add], [y + min_add, y + max_add], color, linewidth=line_width) ax.plot([x + max_add, x + max_add], [y + min_add, y + max_add], color, linewidth=line_width) return ax