def mutual_information_heatmap(self, attributes: List = None): if attributes: private_df = self.private_df[attributes] synthetic_df = self.synthetic_df[attributes] else: private_df = self.private_df synthetic_df = self.synthetic_df private_mi = pairwise_attributes_mutual_information(private_df) synthetic_mi = pairwise_attributes_mutual_information(synthetic_df) fig = plt.figure(figsize=(15, 6), dpi=120) fig.suptitle( 'Pairwise Mutual Information Comparison (Private vs Synthetic)', fontsize=20) ax1 = fig.add_subplot(121) ax2 = fig.add_subplot(122) sns.heatmap(private_mi, ax=ax1, cmap="YlGnBu") sns.heatmap(synthetic_mi, ax=ax2, cmap="YlGnBu") ax1.set_title('Private, max=1', fontsize=15) ax2.set_title('Synthetic, max=1', fontsize=15) fig.autofmt_xdate() fig.tight_layout() plt.subplots_adjust(top=0.83) plt.savefig("plots/titanic_mutual_information_heatmap.jpg")
def get_heatmap_data(dataset_filename): df = pd.read_csv(dataset_filename) values = pairwise_attributes_mutual_information(df) out = [] attributes = values.columns for x, xattr in enumerate(attributes): for y, yattr in enumerate(attributes): out.append( [x, y, int(round(1000 * values.loc[xattr, yattr])) / 1000]) return out