Example #1
0
    def mutual_information_heatmap(self, attributes: List = None):
        if attributes:
            private_df = self.private_df[attributes]
            synthetic_df = self.synthetic_df[attributes]
        else:
            private_df = self.private_df
            synthetic_df = self.synthetic_df

        private_mi = pairwise_attributes_mutual_information(private_df)
        synthetic_mi = pairwise_attributes_mutual_information(synthetic_df)

        fig = plt.figure(figsize=(15, 6), dpi=120)
        fig.suptitle(
            'Pairwise Mutual Information Comparison (Private vs Synthetic)',
            fontsize=20)
        ax1 = fig.add_subplot(121)
        ax2 = fig.add_subplot(122)
        sns.heatmap(private_mi, ax=ax1, cmap="YlGnBu")
        sns.heatmap(synthetic_mi, ax=ax2, cmap="YlGnBu")
        ax1.set_title('Private, max=1', fontsize=15)
        ax2.set_title('Synthetic, max=1', fontsize=15)
        fig.autofmt_xdate()
        fig.tight_layout()
        plt.subplots_adjust(top=0.83)
        plt.savefig("plots/titanic_mutual_information_heatmap.jpg")
def get_heatmap_data(dataset_filename):
    df = pd.read_csv(dataset_filename)
    values = pairwise_attributes_mutual_information(df)
    out = []
    attributes = values.columns
    for x, xattr in enumerate(attributes):
        for y, yattr in enumerate(attributes):
            out.append(
                [x, y,
                 int(round(1000 * values.loc[xattr, yattr])) / 1000])

    return out