def plot_feature_rank(dataset, show=False): print "Plotting plot_feature_rank, dataset: %s" % dataset dfs = get_feature_rankings(dataset=dataset, polynomial_terms=False) order = dfs[['feature', 'weight', 'group']].groupby([ 'feature', 'group' ])['weight'].mean().reset_index().sort_values(['group', 'weight'], ascending=[True, False])['feature'] plot_specs = { 'x_col': 'weight', 'y_col': 'feature', 'hue_col': 'group', 'x_label': 'Score', 'y_label': 'Feature', 'order': order, 'dodge': False, 'labelsize': 8, 'figsize': (8, 11), 'font_scale': 0.8, 'fontsize': 14, 'show': show, 'y_lim': None, 'capsize': .2, 'title': "Feature Importance", } figname = 'feature_rank_%s.pdf' % dataset bar_plot(dfs, figname, **plot_specs)
def domain_adaptation_plot_helper(classifiers, metric='acc'): print "Plotting domain_adaptation, classifiers: %s" % classifiers METHODS = ['target_only', 'source_only', 'relabeled', 'augment', 'coral'] dfs = [] for method in METHODS: for classifier in classifiers: df = get_da_results(classifier, method, metric) util.print_ci_from_df(df['folds'], method, classifier) dfs.append(df) dfs = pd.concat(dfs) if metric == 'acc': y_label = "Accuracy" elif metric == 'fms': y_label = "F-Measure" else: y_label = "AUC" plot_specs = { 'x_col': 'method', 'y_col': 'folds', 'hue_col': 'model', 'x_label': 'Model', 'figsize': (10, 8), 'font_scale': 1.2, 'fontsize': 20, 'y_label': y_label, 'y_lim': (0, 1) } figname = 'domain_adapt_plot_%s_%s.pdf' % (metric, classifiers[1]) bar_plot(dfs, figname, **plot_specs)
def vanilla_feature_set_plot(show=False): print "Plotting vanilla_feature_set_plot" dfs = [] classifiers = models.CLASSIFIER_KEYS for classifier in classifiers: for metric in models.METRICS: df = get_vanilla_results(classifier, metric) util.print_ci_from_df(df['folds'], classifier, metric) dfs.append(df) dfs = pd.concat(dfs) plot_specs = { 'x_col': 'model', 'y_col': 'folds', 'hue_col': 'metric', 'x_label': 'Model', 'y_label': 'Performance', 'figsize': (12, 10), 'font_scale': 1.2, 'fontsize': 20, 'y_lim': None, 'show': show, 'title': "10-Fold Cross Validation Performance" } figname = 'vanilla_results.pdf' bar_plot(dfs, figname, **plot_specs)
def blog_plot(): print "Plotting blog_plot" metrics = models.METRICS dfs = [] for classifier in models.CLASSIFIER_KEYS: for metric in metrics: df = get_blog_results(classifier, metric) util.print_ci_from_df(df['folds'], classifier, metric) dfs.append(df) dfs = pd.concat(dfs) plot_specs = { 'x_col': 'model', 'y_col': 'folds', 'hue_col': 'metric', 'x_label': 'Model', 'y_label': 'Performance', 'font_scale': 1.2, 'fontsize': 20, 'rotation': 15 } figname = 'blog_plot.pdf' bar_plot(dfs, figname, **plot_specs)
def plot_blog_feature_rank(show=False): print "Plotting plot_blog_feature_rank" dfs = get_blog_feature_rankings() order = dfs[['feature', 'weight', 'group']].groupby([ 'feature', 'group' ])['weight'].mean().reset_index().sort_values(['group', 'weight'], ascending=[True, False])['feature'] plot_specs = { 'x_col': 'weight', 'y_col': 'feature', 'hue_col': 'group', 'x_label': 'Feature Score', 'y_label': 'Feature Sets', 'order': order, 'dodge': False, 'labelsize': 8, 'figsize': (8, 11), 'show': show, 'y_lim': None, 'capsize': .2, } figname = 'blog_feature_rank.pdf' bar_plot(dfs, figname, **plot_specs)
def blog_ablation_plot(metric='acc'): print "Plotting blog_ablation_plot, metric: %s" % metric classifiers = models.CLASSIFIER_KEYS ablation_sets = models.BLOG_FEATURE_SETS classifiers.remove('DummyClassifier') dfs = [] for classifier in classifiers: for ab_set in ablation_sets: df = get_ablation_results(ab_set, classifier, metric, BLOG_ABLATION_PREFIX) util.print_ci_from_df(df['folds'], classifier, metric) dfs.append(df) dfs = pd.concat(dfs) human_readable = {"acc": "Accuracy", "fms": "F-Measure", "roc": "AUC"} plot_specs = { 'x_col': 'ablation_set', 'y_col': 'folds', 'hue_col': 'model', 'x_label': 'Feature Set', 'y_label': "Change in %s " % human_readable[metric], 'title': "Feature Ablation", 'figsize': (10, 8), 'fontsize': 20, 'font_scale': 1.2, 'y_lim': None, 'errwidth': 0.75, 'labelsize': 10, 'rotation': 15 } figname = 'blog_ablation_plot.pdf' bar_plot(dfs, figname, **plot_specs)
def barplot(): labels_string = request.args.get('labels') values_string = request.args.get('values') xlabel = request.args.get('xlabel') ylabel = request.args.get('ylabel') title = request.args.get('title') try: labels, values = labels_string.split(','), util.string_to_list( values_string) except Exception as e: print str(e) return 'Nothing to do here.' if len(labels) != len(values): return 'Nothing to do here.' response = util.bar_plot(labels, values, xlabel, ylabel, title) return response
def p_Xw_i_MISE(mock, ell=0, rebin=None, krange=None, method='choletsky', b=0.1): ''' Examine the pdf of X_w^i components that deviate significantly from N(0,1) based on MISE ''' Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange) X, _ = NG.meansub(Pk) X_w, W = NG.whiten(X, method=method) # whitened data # calculate the chi-squared values of each p(X_w^i) x = np.arange(-5., 5.1, 0.1) mise = np.zeros(X_w.shape[1]) for i_bin in range(X_w.shape[1]): mise[i_bin] = NG.MISE(X_w[:,i_bin], b=b) # plot the most discrepant components. prettyplot() fig = plt.figure() sub = fig.add_subplot(111) i_sort = np.argsort(mise) print 'outlier bins = ', i_sort[-5:] print 'mise = ', mise[i_sort[-5:]] nbin = int(10./b) for i_bin in i_sort[-10:]: hb_Xi, Xi_edges = np.histogram(X_w[:,i_bin], bins=nbin, range=[-5., 5.], normed=True) p_X_w_arr = UT.bar_plot(Xi_edges, hb_Xi) sub.plot(p_X_w_arr[0], p_X_w_arr[1]) sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=3, label='$\mathcal{N}(0,1)$') sub.set_xlim([-2.5, 2.5]) sub.set_xlabel('$\mathtt{X^{i}_{W}}$', fontsize=25) sub.set_ylim([0., 0.6]) sub.set_ylabel('$\mathtt{P(X^{i}_{W})}$', fontsize=25) sub.legend(loc='upper right') str_rebin = '' if rebin is not None: str_rebin = '.rebin'+str(rebin) f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i_outlier.', method, '.', mock, '.ell', str(ell), str_rebin, '.b', str(b), '.png']) fig.savefig(f, bbox_inches='tight') return None
def div_K(div_func='kl'): ''' compare the KL or Renyi divergence for the following with their using different K values - D( gauss(C_X) || gauss(C_X) ) - D( mock X || gauss(C_X)) - D( mock X || p(X) KDE) - D( mock X || p(X) GMM) - D( mock X || PI p(X^i_ICA) KDE) - D( mock X || PI p(X^i_ICA) GMM) ''' lbls = [r'$D( P(k) \parallel \mathcal{N}({\bf C}))$', r'$D( P(k) \parallel p_\mathrm{KDE}(P(k)))$', r'$D( P(k) \parallel p_\mathrm{GMM}(P(k)))$', r'$D( P(k) \parallel \prod_i p_\mathrm{KDE}(P(k)_i^\mathrm{ICA}))$', r'$D( P(k) \parallel \prod_i p_\mathrm{GMM}(P(k)_i^\mathrm{ICA}))$'] fig = plt.figure(figsize=(20,4)) for i_obv, obv in enumerate(['pk.ngc', 'gmf']): if obv == 'pk.ngc': Nref = 2000 if div_func == 'kl': hranges = [[-0.5, 0.5], [-0.5, 7.], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5]]##7.] else: hranges = [[-0.5, 0.5] for i in range(5)] Ks = [5, 10, 15] elif obv == 'gmf': Nref = 10000 hranges = [[-0.1, 0.4], [-0.1, 0.4], [-0.1, 0.4], [-0.1, 0.4], [-0.1, 0.4]]##7.] Ks = [10] for K in Ks: fs = ['pX_gauss.K'+str(K), 'pX_scottKDE.K'+str(K), 'pX_GMM.K'+str(K)+'.ncomp30', 'pXi_ICA_scottKDE.K'+str(K), 'pXi_ICA_GMM.K'+str(K)+'.ncomp30'] divs, divs_ref = [], [] for f in fs: f_div = ''.join([UT.dat_dir(), 'diverg.', obv, '.', f, '.Nref', str(Nref), '.', div_func, '.dat']) try: div = np.loadtxt(f_div) except IOError: print f_div continue divs.append(div) nbins = 50 bkgd = fig.add_subplot(2,1,i_obv+1, frameon=False) for i_div, div, lbl in zip(range(len(fs)), divs, lbls): sub = fig.add_subplot(2,5,len(fs)*i_obv+i_div+1) y_max = 0. hh = np.histogram(div, normed=True, range=hranges[i_div], bins=nbins) bp = UT.bar_plot(*hh) sub.fill_between(bp[0], np.zeros(len(bp[0])), bp[1], edgecolor='none') y_max = max(y_max, bp[1].max()) sub.set_xlim(hranges[i_div]) sub.set_ylim([0., y_max*1.4]) if i_obv == 0: sub.set_title(lbl) if div_func == 'kl': bkgd.set_xlabel(r'KL divergence', fontsize=20, labelpad=20) elif div_func == 'renyi0.5': bkgd.set_xlabel(r'R\'enyi-$\alpha$ divergence', fontsize=20, labelpad=20) bkgd.set_xticklabels([]) bkgd.set_yticklabels([]) bkgd.tick_params(labelcolor='none', top='off', bottom='off', left='off', right='off') fig.subplots_adjust(wspace=.15, hspace=0.3) f_fig = ''.join([UT.fig_dir(), 'tests/Ktest_kNNdiverg.', div_func, '.png']) fig.savefig(f_fig, bbox_inches='tight') return None
def p_Xw_i(mock, ell=0, rebin=None, krange=None, ica=False, pca=False): ''' Test the probability distribution function of each X_w^i component -- p(X_w^i). First compare the histograms of p(X_w^i) with N(0,1). Then compare the gaussian KDE of p(X_w^i). ''' Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange) X, _ = NG.meansub(Pk) str_w = 'W' if ica and pca: raise ValueError if ica: # ICA components # ICA components do not need to be Gaussian. # in fact the whole point of the ICA transform # is to capture the non-Gaussianity... X_white, _ = NG.whiten(X) # whitened data X_w, _ = NG.Ica(X_white) str_w = 'ICA' if pca: # PCA components X_w, _ = NG.whiten(X, method='pca') # whitened data str_w = 'PCA' if not ica and not pca: # just whitened X_w, W = NG.whiten(X) # whitened data # p(X_w^i) histograms fig = plt.figure(figsize=(15,7)) sub = fig.add_subplot(121) for i_bin in range(X_w.shape[1]): p_X_w, edges = np.histogram(X_w[:,i_bin], normed=True) p_X_w_arr = UT.bar_plot(edges, p_X_w) sub.plot(p_X_w_arr[0], p_X_w_arr[1]) x = np.arange(-5., 5.1, 0.1) sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=3, label='$\mathcal{N}(0,1)$') sub.set_xlim([-2.5, 2.5]) sub.set_xlabel('$\mathtt{X_{'+str_w+'}}$', fontsize=25) sub.set_ylim([0., 0.6]) sub.set_ylabel('$\mathtt{P(X_{'+str_w+'})}$', fontsize=25) sub.legend(loc='upper right') # p(X_w^i) gaussian KDE fits pdfs = NG.p_Xw_i(X_w, range(X_w.shape[1]), x=x) sub = fig.add_subplot(122) for i_bin in range(X_w.shape[1]): sub.plot(x, pdfs[i_bin]) sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=3, label='$\mathcal{N}(0,1)$') sub.set_xlim([-2.5, 2.5]) sub.set_xlabel('$\mathtt{X_{W}}$', fontsize=25) sub.set_ylim([0., 0.6]) sub.set_ylabel('$\mathtt{P(X_{W})}$', fontsize=25) sub.legend(loc='upper right') str_ica, str_pca = '', '' if ica: str_ica = '.ICA' if pca: str_pca = '.PCA' if rebin is None: f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i', str_pca, str_ica, '.', mock, '.ell', str(ell), '.png']) else: f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i', str_pca, str_ica, '.', mock, '.ell', str(ell), '.rebin', str(rebin), '.png']) fig.savefig(f, bbox_inches='tight') return None
def GMF_p_Xw_i(ica=False, pca=False): ''' Test the probability distribution function of each transformed X component -- p(X^i). First compare the histograms of p(X_w^i) with N(0,1). Then compare the gaussian KDE of p(X_w^i). ''' gmf = NG.X_gmf_all() # import all the GMF mocks X, _ = NG.meansub(gmf) str_w = 'W' if ica and pca: raise ValueError if ica: # ICA components # ICA components do not need to be Gaussian. # in fact the whole point of the ICA transform # is to capture the non-Gaussianity... X_white, _ = NG.whiten(X) # whitened data X_w, _ = NG.Ica(X_white) str_w = 'ICA' if pca: # PCA components X_w, _ = NG.whiten(X, method='pca') # whitened data str_w = 'PCA' if not ica and not pca: # just whitened X_w, W = NG.whiten(X) # whitened data # p(X_w^i) histograms fig = plt.figure(figsize=(5*gmf.shape[1],4)) for icomp in range(gmf.shape[1]): sub = fig.add_subplot(1, gmf.shape[1], icomp+1) # histogram of X_w^i s hh = np.histogram(X_w[:,icomp], normed=True, bins=50, range=[-5., 5.]) p_X_w_arr = UT.bar_plot(*hh) sub.fill_between(p_X_w_arr[0], np.zeros(len(p_X_w_arr[1])), p_X_w_arr[1], color='k', alpha=0.25) x = np.linspace(-5., 5., 100) sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=2, ls=':', label='$\mathcal{N}(0,1)$') # p(X_w^i) gaussian KDE fits t_start = time.time() pdf = NG.p_Xw_i(X_w, icomp, x=x, method='gkde') sub.plot(x, pdf, lw=2.5, label='Gaussian KDE') print 'scipy Gaussian KDE ', time.time()-t_start # p(X_w^i) SKlearn KDE fits t_start = time.time() pdf = NG.p_Xw_i(X_w, icomp, x=x, method='sk_kde') sub.plot(x, pdf, lw=2.5, label='SKlearn KDE') print 'SKlearn CV best-fit KDE ', time.time()-t_start # p(X_w^i) statsmodels KDE fits t_start = time.time() pdf = NG.p_Xw_i(X_w, icomp, x=x, method='sm_kde') sub.plot(x, pdf, lw=2.5, label='StatsModels KDE') print 'Stats Models KDE ', time.time()-t_start # p(X_w^i) GMM fits pdf = NG.p_Xw_i(X_w, icomp, x=x, method='gmm', n_comp_max=20) sub.plot(x, pdf, lw=2.5, ls='--', label='GMM') sub.set_xlim([-3., 3.]) sub.set_xlabel('$X_{'+str_w+'}^{('+str(icomp)+')}$', fontsize=25) sub.set_ylim([0., 0.6]) if icomp == 0: sub.set_ylabel('$P(X_{'+str_w+'})$', fontsize=25) sub.legend(loc='upper left', prop={'size': 15}) str_ica, str_pca = '', '' if ica: str_ica = '.ICA' if pca: str_pca = '.PCA' f = ''.join([UT.fig_dir(), 'tests/test.GMF_p_Xw_i', str_pca, str_ica, '.png']) fig.savefig(f, bbox_inches='tight') return None
def divGMF(div_func='kl', Nref=1000, K=5, n_mc=10, n_comp_max=10, n_mocks=2000): ''' compare the divergence estimates between D( gauss(C_gmf) || gauss(C_gmf) ), D( gmfs || gauss(C_gmf) ), D( gmfs || p(gmfs) KDE), D( gmfs || p(gmfs) GMM), D( gmfs || PI p(gmfs^i_ICA) KDE), and D( gmfs || PI p(gmfs^i_ICA) GMM) ''' if isinstance(Nref, float): Nref = int(Nref) # read in mock GMFs from all HOD realizations (20,000 mocks) gmfs_mock = NG.X_gmf_all()[:n_mocks] n_mock = gmfs_mock.shape[0] # number of mocks print("%i mocks" % n_mock) gmfs_mock_meansub, _ = NG.meansub(gmfs_mock) # mean subtract X_w, W = NG.whiten(gmfs_mock_meansub) X_ica, _ = NG.Ica(X_w) # ICA transformation C_gmf = np.cov(X_w.T) # covariance matrix # p(gmfs) GMM gmms, bics = [], [] for i_comp in range(1,n_comp_max+1): gmm = GMix(n_components=i_comp) gmm.fit(X_w) gmms.append(gmm) bics.append(gmm.bic(X_w)) ibest = np.array(bics).argmin() kern_gmm = gmms[ibest] # p(gmfs) KDE t0 = time.time() grid = GridSearchCV(skKDE(), {'bandwidth': np.linspace(0.1, 1.0, 30)}, cv=10) # 10-fold cross-validation grid.fit(X_w) kern_kde = grid.best_estimator_ dt = time.time() - t0 print('%f sec' % dt) # PI p(gmfs^i_ICA) GMM kern_gmm_ica = [] for ibin in range(X_ica.shape[1]): gmms, bics = [], [] for i_comp in range(1,n_comp_max+1): gmm = GMix(n_components=i_comp) gmm.fit(X_ica[:,ibin][:,None]) gmms.append(gmm) bics.append(gmm.bic(X_ica[:,ibin][:,None])) ibest = np.array(bics).argmin() kern_gmm_ica.append(gmms[ibest]) # PI p(gmfs^i_ICA) KDE kern_kde_ica = [] for ibin in range(X_ica.shape[1]): t0 = time.time() grid = GridSearchCV(skKDE(), {'bandwidth': np.linspace(0.1, 1.0, 30)}, cv=10) # 10-fold cross-validation grid.fit(X_ica[:,ibin][:,None]) kern_kde_ica.append(grid.best_estimator_) dt = time.time() - t0 print('%f sec' % dt) # caluclate the divergences now div_gauss_ref, div_gauss = [], [] div_gmm, div_gmm_ica = [], [] div_kde, div_kde_ica = [], [] for i in range(n_mc): print('%i montecarlo' % i) t_start = time.time() # reference divergence in order to showcase the estimator's scatter # Gaussian distribution described by C_gmf with same n_mock mocks gauss = mvn(np.zeros(gmfs_mock.shape[1]), C_gmf, size=n_mock) div_gauss_ref_i = NG.kNNdiv_gauss(gauss, C_gmf, Knn=K, div_func=div_func, Nref=Nref) div_gauss_ref.append(div_gauss_ref_i) # estimate divergence between gmfs_white and a # Gaussian distribution described by C_gmf div_gauss_i = NG.kNNdiv_gauss(X_w, C_gmf, Knn=K, div_func=div_func, Nref=Nref) div_gauss.append(div_gauss_i) # D( gmfs || p(gmfs) GMM) div_gmm_i = NG.kNNdiv_Kernel(X_w, kern_gmm, Knn=K, div_func=div_func, Nref=Nref, compwise=False) div_gmm.append(div_gmm_i) # D( gmfs || p(gmfs) KDE) div_kde_i = NG.kNNdiv_Kernel(X_w, kern_kde, Knn=K, div_func=div_func, Nref=Nref, compwise=False) div_kde.append(div_kde_i) # D( gmfs || PI p(gmfs^i_ICA) GMM), div_gmm_ica_i = NG.kNNdiv_Kernel(X_ica, kern_gmm_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True) div_gmm_ica.append(div_gmm_ica_i) # D( gmfs || PI p(gmfs^i_ICA) KDE), div_kde_ica_i = NG.kNNdiv_Kernel(X_ica, kern_kde_ica, Knn=K, div_func=div_func, Nref=Nref, compwise=True) div_kde_ica.append(div_kde_ica_i) print('t= %f sec' % round(time.time()-t_start,2)) fig = plt.figure(figsize=(10,5)) sub = fig.add_subplot(111) hrange = [-0.15, 0.6] nbins = 50 divs = [div_gauss_ref, div_gauss, div_gmm, div_kde, div_gmm_ica, div_kde_ica] labels = ['Ref.', r'$D(\{\zeta_i^{(m)}\}\parallel \mathcal{N}({\bf C}^{(m)}))$', r'$D(\{\zeta^{(m)}\}\parallel p_\mathrm{GMM}(\{\zeta^{m}\}))$', r'$D(\{\zeta^{(m)}\}\parallel p_\mathrm{KDE}(\{\zeta^{m}\}))$', r'$D(\{\zeta_\mathrm{ICA}^{(m)}\}\parallel \prod_{i} p^\mathrm{GMM}(\{\zeta_{i, \mathrm{ICA}}^{m}\}))$', r'$D(\{\zeta_\mathrm{ICA}^{(m)}\}\parallel \prod_{i} p^\mathrm{KDE}(\{\zeta_{i, \mathrm{ICA}}^{m}\}))$'] y_max = 0. for div, lbl in zip(divs, labels): hh = np.histogram(np.array(div), normed=True, range=hrange, bins=nbins) bp = UT.bar_plot(*hh) sub.fill_between(bp[0], np.zeros(len(bp[0])), bp[1], edgecolor='none', alpha=0.5, label=lbl) y_max = max(y_max, bp[1].max()) if (np.average(div) < hrange[0]) or (np.average(div) > hrange[1]): print('divergence of %s (%f) is outside range' % (lbl, np.average(div))) sub.set_xlim(hrange) sub.set_ylim([0., y_max*1.2]) sub.legend(loc='upper left', prop={'size': 15}) # xlabels if 'renyi' in div_func: alpha = float(div_func.split(':')[-1]) sub.set_xlabel(r'Renyi-$\alpha='+str(alpha)+'$ divergence', fontsize=20) elif 'kl' in div_func: sub.set_xlabel(r'KL divergence', fontsize=20) if 'renyi' in div_func: str_div = 'renyi'+str(alpha) elif div_func == 'kl': str_div = 'kl' f_fig = ''.join([UT.fig_dir(), 'tests/kNN_divergence.gmf.K', str(K), '.', str(n_mocks), '.', str_div, '.png']) fig.savefig(f_fig, bbox_inches='tight') return None
def div_ICA(obv='pk.ngc', K=10, div_func='kl'): ''' compare the KL or Renyi divergence for different ICA decomposition algorithms FastICA deflation, FastICA parallel, Infomax ICA - D( mock X || PI p(X^i_ICA) KDE) - D( mock X || PI p(X^i_ICA) GMM) ''' if obv == 'pk.ngc': str_obv = 'P(k)' elif obv == 'gmf': str_obv = '\zeta(N)' lbls = [r'$D( '+str_obv+' \parallel \prod_i p_\mathrm{KDE}(P(k)_i^\mathrm{ICA}))$', r'$D( '+str_obv+' \parallel \prod_i p_\mathrm{GMM}(P(k)_i^\mathrm{ICA}))$'] icas = ['ICA', 'parICA'] if obv == 'pk.ngc': Nref = 2000 hrange = [-0.5, 0.5] elif obv == 'gmf': Nref = 10000 hranges = [-0.1, 0.4] fig = plt.figure(figsize=(10,4)) bkgd = fig.add_subplot(111, frameon=False) for i_div, str_div in enumerate(['scottKDE.K'+str(K), 'GMM.K'+str(K)+'.ncomp30']): divs = [] for ica in icas: f_div = ''.join([UT.dat_dir(), 'diverg.', obv, '.pXi_', ica, '_', str_div, '.Nref', str(Nref), '.', div_func, '.dat']) try: div = np.loadtxt(f_div) except IOError: print f_div continue divs.append(div) nbins = 50 sub = fig.add_subplot(1,2,i_div+1) y_max = 0. for div, ica in zip(divs, icas): print np.mean(div) hh = np.histogram(div, normed=True, range=hrange, bins=nbins) bp = UT.bar_plot(*hh) sub.fill_between(bp[0], np.zeros(len(bp[0])), bp[1], edgecolor='none', label=ica) y_max = max(y_max, bp[1].max()) if i_div == 0: sub.legend(loc='upper left', prop={'size': 20}) sub.set_xlim(hrange) sub.set_ylim([0., y_max*1.4]) sub.set_title(lbls[i_div]) if div_func == 'kl': bkgd.set_xlabel(r'KL divergence', fontsize=20, labelpad=20) elif div_func == 'renyi0.5': bkgd.set_xlabel(r'R\'enyi-$\alpha$ divergence', fontsize=20, labelpad=20) bkgd.set_xticklabels([]) bkgd.set_yticklabels([]) bkgd.tick_params(labelcolor='none', top='off', bottom='off', left='off', right='off') fig.subplots_adjust(wspace=.15, hspace=0.3) f_fig = ''.join([UT.fig_dir(), 'tests/', 'ICA_kNNdiverg.', obv, '.K', str(K), '.', div_func, '.png']) fig.savefig(f_fig, bbox_inches='tight') return None
# Plotted read functions include: # - readConsent # - readArtwork # - getHistoryForArtwork # - readLog # - readModel # - readBalance # - invokeModel import util from matplotlib import pyplot as plt if __name__ == "__main__": filterFunc = [ 'initialConsent', 'grantRevokeConsent', 'uploadArtwork', 'transferArtwork', 'addLog', 'addModel', 'addWallet', 'transferBlalance' ] data = util.concat_result(filterFunc, 7) # 7 for tps fig, ax = plt.subplots(figsize=(10, 6)) util.bar_plot(ax, data, total_width=.75, single_width=.9) plt.xticks(range(7), ["10", "20", "30", "40", "50", "60", "70"]) plt.xlabel('txDuration (sec)') plt.ylabel('Throughtput (tps)') plt.title( 'Read performance of different functions under different transaction duration' ) plt.show()
def new_feature_set_plot(metric='acc', absolute=True, poly=True, show=False): print "Plotting new_feature_set_plot, metric: %s" % metric classifiers = list(models.CLASSIFIER_KEYS) new_features = [] if absolute: new_features += ['none'] new_features += models.NEW_FEATURE_SETS classifiers.remove('DummyClassifier') dfs = [] for fs in new_features: for classifier in classifiers: df = get_new_feature_results(fs, classifier, metric, absolute=absolute, poly=poly) util.print_ci_from_df(df['folds'], fs, classifier) dfs.append(df) dfs = pd.concat(dfs) dfs = dfs.replace('none', 'baseline') y_lim = (.68, .90) if metric == 'acc': y_label = "Accuracy" elif metric == 'fms': y_label = "F-Measure" else: y_label = "AUC" y_lim = (.70, .95) figname = 'new_feature_plot_%s' % metric title = 'Performance w/ New Feature Sets' if not absolute: y_label = "Change in %s" % y_label y_lim = (-.10, .10) figname = figname + '_relative' title = 'Change in Performance w/ New Feature Sets' plot_specs = { 'x_col': 'new_feature_set', 'y_col': 'folds', 'hue_col': 'model', 'x_label': 'Feature Set', 'y_label': y_label, 'y_lim': y_lim, 'figsize': (10, 8), 'fontsize': 20, 'font_scale': 1.2, 'labelsize': 15, 'show': show, 'title': title, } # We use polynomial terms as well for halves if poly: dfs = dfs.replace('halves', 'halves+quadratic') else: figname = figname + '_without_quadratic' figname = figname + '.pdf' bar_plot(dfs, figname, **plot_specs)
def groupcatSFMS(mrange=[10.6,10.8]): '''Figure of the z~0 group catalog. Panel a) SFR-M* relation Panel b) P(SSFR) with SFMS fitting ''' # Read in Jeremy's group catalog with Mr_cut = -18 gc = Cat.Observations('group_catalog', Mrcut=18, position='central') gc_cat = gc.Read() fig = plt.figure(figsize=(10,5)) # fit the SFMS using lettalkaboutquench sfms fitting _fSFMS = fstarforms() _fit_logm, _fit_logsfr = _fSFMS.fit(gc_cat['mass'], gc_cat['sfr'], method='gaussmix', fit_range=None) logsfr_ms = _fSFMS.powerlaw(logMfid=10.5) print _fSFMS._powerlaw_m print _fSFMS._powerlaw_c fSFMS = fstarforms() fit_logm, _ = fSFMS.fit(gc_cat['mass'], gc_cat['sfr'], method='gaussmix', fit_range=mrange) _, fit_fsfms = fSFMS.frac_SFMS() i_fit = np.abs(fit_logm - np.mean(mrange)).argmin() # log SFR - log M* highlighting where the SFMS lies sub1 = fig.add_subplot(1,2,1) DFM.hist2d(gc_cat['mass'], gc_cat['sfr'], color='#ee6a50', levels=[0.68, 0.95], range=[[9., 12.], [-3.5, 1.5]], plot_datapoints=True, fill_contours=False, plot_density=True, ax=sub1) gc = Cat.Observations('group_catalog', Mrcut=18, position='central') gc_cat = gc.Read() #sub1.vlines(mrange[0], -5., 2., color='k', linewidth=2, linestyle='--') #sub1.vlines(mrange[1], -5., 2., color='k', linewidth=2, linestyle='--') #sub1.fill_between(mrange, [2.,2.], [-5.,-5], color='#1F77B4', alpha=0.25) sub1.fill_between(mrange, [2.,2.], [-5.,-5], color='k', linewidth=0, alpha=0.25) print _fit_logm, _fit_logsfr sub1.plot(np.linspace(9.8, 11., 10), logsfr_ms(np.linspace(9.8, 11., 10)), c='k', linestyle='--') sub1.set_xticks([9., 10., 11., 12.]) sub1.set_xlabel('log$(\; M_*\; [M_\odot]\;)$', fontsize=20) sub1.set_yticks([-3., -2., -1., 0., 1.]) sub1.set_ylabel('log$(\; \mathrm{SFR}\; [M_\odot/\mathrm{yr}]\;)$', fontsize=20) sub1.text(0.95, 0.1, 'SDSS central galaxies', ha='right', va='center', transform=sub1.transAxes, fontsize=20) # P(log SSFR) sub2 = fig.add_subplot(1,2,2) inmbin = np.where((gc_cat['mass'] > mrange[0]) & (gc_cat['mass'] < mrange[1])) bedge, pp = np.histogram(gc_cat['ssfr'][inmbin], range=[-14., -9.], bins=32, normed=True) pssfr = UT.bar_plot(pp, bedge) sub2.plot(pssfr[0], pssfr[1], c='k', lw=2) # overplot GMM component for SFMS gmm_weights = fSFMS._gmix_weights[i_fit] gmm_means = fSFMS._gmix_means[i_fit] gmm_vars = fSFMS._gmix_covariances[i_fit] icomp = gmm_means.argmax() xx = np.linspace(-14., -9, 100) sub2.fill_between(xx, np.zeros(len(xx)), gmm_weights[icomp]*MNorm.pdf(xx, gmm_means[icomp], gmm_vars[icomp]), color='#1F77B4', linewidth=0) for i_comp in range(len(gmm_vars)): if i_comp == 0: gmm_tot = gmm_weights[i_comp]*MNorm.pdf(xx, gmm_means[i_comp], gmm_vars[i_comp]) else: gmm_tot += gmm_weights[i_comp]*MNorm.pdf(xx, gmm_means[i_comp], gmm_vars[i_comp]) #sub2.plot(xx, gmm_tot, color='r', linewidth=2) sub2.set_xlim([-13.25, -9.5]) sub2.set_xticks([-10., -11., -12., -13.][::-1]) #sub2.set_xlim([-9.5, -13.25]) #sub2.set_xticks([-10., -11., -12., -13.]) sub2.set_xlabel('log$(\; \mathrm{SSFR}\; [\mathrm{yr}^{-1}]\;)$', fontsize=20) sub2.set_ylim([0., 1.5]) sub2.set_yticks([0., 0.5, 1., 1.5]) sub2.set_ylabel('$p\,(\;\mathrm{log}\; \mathrm{SSFR}\;)$', fontsize=20) # mass bin sub2.text(0.5, 0.9, '$'+str(mrange[0])+'< \mathrm{log}\, M_* <'+str(mrange[1])+'$', ha='center', va='center', transform=sub2.transAxes, fontsize=20) sub2.text(0.9, 0.33, '$f_\mathrm{SFMS}='+str(round(fit_fsfms[i_fit],2))+'$', ha='right', va='center', transform=sub2.transAxes, fontsize=20) fig.subplots_adjust(wspace=.3) fig.savefig(''.join([UT.tex_dir(), 'figs/groupcat.pdf']), bbox_inches='tight', dpi=150) plt.close() return None