def test_plot_correlation_scatter_plot(self): col1 = range(10) col2 = [cell * 3 + 1 for cell in col1] col3 = [1, 5, 8, 4, 1, 8, 5, 9, 0, 1] sa = utils.convert_to_sa( zip(col1, col2, col3), col_names=['base', 'linear_trans', 'no_correlation']) fig = dsp.plot_correlation_scatter_plot(sa, verbose=False) self.add_fig_to_report(fig, 'plot_correlation_scatter_plot')
labels = sklearn.datasets.load_iris().target M = cast_np_nd_to_sa(M) #M is multi class, we want to remove those rows. keep_index = np.where(labels != 2) labels = labels[keep_index] M = M[keep_index] if False: for x in describe_cols(M): print x if False: plot_correlation_scatter_plot(M) plot_correlation_matrix(M) plot_kernel_density(M['f0']) #no designation of col name plot_box_plot(M['f0']) #no designation of col name if False: from diogenes.generate import val_between, choose_rows_where, append_cols #val_btwn, where #generate a composite rule M = choose_rows_where(M, [{ 'func': val_between, 'col_name': 'f0', 'vals': (3.5, 5.0) }, { 'func': val_between, 'col_name': 'f1', 'vals': (2.7, 3.1)
plot_kernel_density, plot_box_plot) from diogenes.grid_search import Experiment from diogenes.grid_search import std_clfs as std_clfs from diogenes.utils import remove_cols data = open_csv_url( 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv', delimiter=';') y = data['quality'] M = remove_cols(data, 'quality') y = y < np.average(y) if False: for x in describe_cols(M): print x if False: plot_correlation_scatter_plot(M) plot_correlation_matrix(M) plot_kernel_density(M['f0']) #no designation of col name plot_box_plot(M['f0']) #no designation of col name exp = Experiment(M, y, clfs=std_clfs) exp.make_csv()