def test_plot_figures_output_show(): from pandas import DataFrame plot_confusion_matrix(DataFrame({ 'True': [2, 3], 'False': [5, 0] }), otype='show') plot_confusion_matrix(DataFrame({ '7th-8th': [2, 3, 5, 0], 'Masters': [0, 4, 1, 0], '11th': [0, 1, 5, 2], 'Bachelors': [2, 0, 0, 6] }), otype='show') bins = np.array([ 28., 29.25, 30.5, 31.75, 33., 34.25, 35.5, 36.75, 38., 39.25, 40.5, 41.75, 43., 44.25, 45.5, 46.75, 48., 49.25, 50.5 ]) counts = np.array( [[1, 0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0], [1, 0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]]) plot_histogram(bins, counts, otype='show') bins = np.array(['Private', 'Self-emp-not-inc', 'State-gov']) counts = np.array([[6, 2, 1], [6, 2, 1]]) plot_histogram(bins, counts, otype='show') bins = np.array(['11th', '9th', 'Bachelors', 'HS-grad', 'Masters']) counts = np.array([[3, 2, 2, 1, 1], [3, 2, 2, 1, 1]]) plot_histogram(bins, counts, otype='show') bins = np.array([ 5., 5.45, 5.9, 6.35, 6.8, 7.25, 7.7, 8.15, 8.6, 9.05, 9.5, 9.95, 10.4, 10.85, 11.3, 11.75, 12.2, 12.65, 13.1 ]) counts = np.array( [[1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0], [1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0]]) plot_histogram(bins, counts, otype='show') bins = np.array(['Female', 'Male']) counts = np.array([[5, 4], [5, 4]]) plot_histogram(bins, counts, otype='show') from .testdata import adults01 from ds4ml.metrics import pairwise_mutual_information data = pairwise_mutual_information(DataFrame(adults01)) plot_heatmap(data, otype='show')
def _get_dist(self): """ return the distribution information """ from ds4ml.utils import plot_histogram dists = [] for col in self.columns: bins, counts = self.dist(col) svg = plot_histogram(bins, counts) dists.append({ 'name': col, 'columns': bins, 'data': counts, 'path': svg }) return dists
def test_plot_figures_output_show_special_characters(): bins = np.array(['你好', 'Self-る', '¥¶ĎǨД']) counts = np.array([[6, 2, 1], [6, 2, 1]]) plot_histogram(bins, counts, otype='show')
def to_html(self, buf=None, title='Evaluation Report', info=True, distribute=True, correlate=True, classifier=None, labels=None, test=None): """ Render the evaluation result of two data set as an HTML file. Parameters ---------- buf : optional buffer to write to title : str title of evaluation report info : bool, default true show basic information of two data set, including relative error, and Jensen-Shannon divergence (jsd). distribute : bool, default true show distribution of each attribute. correlate : bool, default true show correlation of pair-wise attributes. classifier : str use classifier to train data set on one or more columns (defined by parameter 'label') and show prediction result on the evaluation report. Optional classifier: SVM. labels : list of column names column name, or a list of column names separated by comma, used for classification task. test : pd.DataFrame test data for classification, and other machine learning tasks. """ from ds4ml.utils import (plot_histogram, plot_heatmap, plot_confusion_matrix) from mako.template import Template import os old_cwd = os.getcwd() os.chdir(os.path.dirname(__file__)) template = Template(filename='template/report.html') os.chdir(old_cwd) topics = [] content = {} # format different kinds of evaluation result to unified style if info: topics.append('basic') content['basic'] = [self.describe().to_dict('split')] if distribute: topics.append('dist') content['dist'] = [] for col in self.columns: bins, counts = self.dist(col) svg = plot_histogram(bins, counts) content['dist'].append({ 'name': col, 'columns': bins, 'data': counts, 'path': svg }) if correlate: topics.append('corr') content['corr'] = [] source_mi, target_mi = self.corr() source_svg = plot_heatmap(source_mi) target_svg = plot_heatmap(target_mi) content['corr'].append({ 'matrix': source_mi.to_dict('split'), 'path': source_svg }) content['corr'].append({ 'matrix': target_mi.to_dict('split'), 'path': target_svg }) if labels is not None: topics.append('svm') content['svm'] = [] for col in labels: in_test = (test is not None and col in test) or (test is None) if in_test: # When class label in svm classify test data, try to match # two predicted result with the actual data, and so, there # will be two confusion matrix diagrams. try: source_cm, target_cm = self.classify(col, test=test) vrange = (min(source_cm.values.min(), target_cm.values.min()), max(source_cm.values.max(), target_cm.values.max())) path = (plot_confusion_matrix(source_cm, vrange=vrange, xlabel='raw', ylabel='actual'), plot_confusion_matrix(target_cm, vrange=vrange, xlabel='synth', ylabel='actual')) content['svm'].append({'column': col, 'path': path}) except ValueError as e: print(e) else: # If not, will compare two predicted result. try: cm = self.classify(col, test=test) # make path's type: 1-tuple path = (plot_confusion_matrix(cm, xlabel='synth', ylabel='raw'), ) content['svm'].append({'column': col, 'path': path}) except ValueError as e: print(e) svms = content['svm'] if 'svm' in content else [] if buf: with open(buf, 'w+', encoding='utf-8') as file: file.write( template.render(title=title, basics=content['basic'], dists=content['dist'], corrs=content['corr'], svms=svms))