コード例 #1
0
def plot_feature_rank(dataset, show=False):
    print "Plotting plot_feature_rank, dataset: %s" % dataset
    dfs = get_feature_rankings(dataset=dataset, polynomial_terms=False)
    order = dfs[['feature', 'weight', 'group']].groupby([
        'feature', 'group'
    ])['weight'].mean().reset_index().sort_values(['group', 'weight'],
                                                  ascending=[True,
                                                             False])['feature']
    plot_specs = {
        'x_col': 'weight',
        'y_col': 'feature',
        'hue_col': 'group',
        'x_label': 'Score',
        'y_label': 'Feature',
        'order': order,
        'dodge': False,
        'labelsize': 8,
        'figsize': (8, 11),
        'font_scale': 0.8,
        'fontsize': 14,
        'show': show,
        'y_lim': None,
        'capsize': .2,
        'title': "Feature Importance",
    }

    figname = 'feature_rank_%s.pdf' % dataset
    bar_plot(dfs, figname, **plot_specs)
コード例 #2
0
def domain_adaptation_plot_helper(classifiers, metric='acc'):
    print "Plotting domain_adaptation, classifiers: %s" % classifiers
    METHODS = ['target_only', 'source_only', 'relabeled', 'augment', 'coral']
    dfs = []
    for method in METHODS:
        for classifier in classifiers:
            df = get_da_results(classifier, method, metric)
            util.print_ci_from_df(df['folds'], method, classifier)
            dfs.append(df)

    dfs = pd.concat(dfs)

    if metric == 'acc':
        y_label = "Accuracy"
    elif metric == 'fms':
        y_label = "F-Measure"
    else:
        y_label = "AUC"

    plot_specs = {
        'x_col': 'method',
        'y_col': 'folds',
        'hue_col': 'model',
        'x_label': 'Model',
        'figsize': (10, 8),
        'font_scale': 1.2,
        'fontsize': 20,
        'y_label': y_label,
        'y_lim': (0, 1)
    }

    figname = 'domain_adapt_plot_%s_%s.pdf' % (metric, classifiers[1])
    bar_plot(dfs, figname, **plot_specs)
コード例 #3
0
def vanilla_feature_set_plot(show=False):
    print "Plotting vanilla_feature_set_plot"
    dfs = []
    classifiers = models.CLASSIFIER_KEYS
    for classifier in classifiers:
        for metric in models.METRICS:
            df = get_vanilla_results(classifier, metric)
            util.print_ci_from_df(df['folds'], classifier, metric)
            dfs.append(df)

    dfs = pd.concat(dfs)

    plot_specs = {
        'x_col': 'model',
        'y_col': 'folds',
        'hue_col': 'metric',
        'x_label': 'Model',
        'y_label': 'Performance',
        'figsize': (12, 10),
        'font_scale': 1.2,
        'fontsize': 20,
        'y_lim': None,
        'show': show,
        'title': "10-Fold Cross Validation Performance"
    }

    figname = 'vanilla_results.pdf'
    bar_plot(dfs, figname, **plot_specs)
コード例 #4
0
ファイル: blog.py プロジェクト: arankong/dementia_classifier
def blog_plot():
    print "Plotting blog_plot"
    metrics = models.METRICS
    dfs = []
    for classifier in models.CLASSIFIER_KEYS:
        for metric in metrics:
            df = get_blog_results(classifier, metric)
            util.print_ci_from_df(df['folds'], classifier, metric)
            dfs.append(df)

    dfs = pd.concat(dfs)

    plot_specs = {
        'x_col': 'model',
        'y_col': 'folds',
        'hue_col': 'metric',
        'x_label': 'Model',
        'y_label': 'Performance',
        'font_scale': 1.2,
        'fontsize': 20,
        'rotation': 15
    }

    figname = 'blog_plot.pdf'

    bar_plot(dfs, figname, **plot_specs)
コード例 #5
0
def plot_blog_feature_rank(show=False):
    print "Plotting plot_blog_feature_rank"
    dfs = get_blog_feature_rankings()
    order = dfs[['feature', 'weight', 'group']].groupby([
        'feature', 'group'
    ])['weight'].mean().reset_index().sort_values(['group', 'weight'],
                                                  ascending=[True,
                                                             False])['feature']
    plot_specs = {
        'x_col': 'weight',
        'y_col': 'feature',
        'hue_col': 'group',
        'x_label': 'Feature Score',
        'y_label': 'Feature Sets',
        'order': order,
        'dodge': False,
        'labelsize': 8,
        'figsize': (8, 11),
        'show': show,
        'y_lim': None,
        'capsize': .2,
    }

    figname = 'blog_feature_rank.pdf'
    bar_plot(dfs, figname, **plot_specs)
コード例 #6
0
def blog_ablation_plot(metric='acc'):
    print "Plotting blog_ablation_plot, metric: %s" % metric
    classifiers = models.CLASSIFIER_KEYS
    ablation_sets = models.BLOG_FEATURE_SETS

    classifiers.remove('DummyClassifier')

    dfs = []

    for classifier in classifiers:
        for ab_set in ablation_sets:
            df = get_ablation_results(ab_set, classifier, metric,
                                      BLOG_ABLATION_PREFIX)
            util.print_ci_from_df(df['folds'], classifier, metric)
            dfs.append(df)

    dfs = pd.concat(dfs)

    human_readable = {"acc": "Accuracy", "fms": "F-Measure", "roc": "AUC"}

    plot_specs = {
        'x_col': 'ablation_set',
        'y_col': 'folds',
        'hue_col': 'model',
        'x_label': 'Feature Set',
        'y_label': "Change in %s " % human_readable[metric],
        'title': "Feature Ablation",
        'figsize': (10, 8),
        'fontsize': 20,
        'font_scale': 1.2,
        'y_lim': None,
        'errwidth': 0.75,
        'labelsize': 10,
        'rotation': 15
    }

    figname = 'blog_ablation_plot.pdf'

    bar_plot(dfs, figname, **plot_specs)
コード例 #7
0
def barplot():
    labels_string = request.args.get('labels')
    values_string = request.args.get('values')
    xlabel = request.args.get('xlabel')
    ylabel = request.args.get('ylabel')
    title = request.args.get('title')
    try:
        labels, values = labels_string.split(','), util.string_to_list(
            values_string)
    except Exception as e:
        print str(e)
        return 'Nothing to do here.'
    if len(labels) != len(values):
        return 'Nothing to do here.'
    response = util.bar_plot(labels, values, xlabel, ylabel, title)
    return response
コード例 #8
0
def p_Xw_i_MISE(mock, ell=0, rebin=None, krange=None, method='choletsky', b=0.1):
    ''' Examine the pdf of X_w^i components that deviate significantly from  
    N(0,1) based on MISE 
    '''
    Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange)
    X, _ = NG.meansub(Pk)
    X_w, W = NG.whiten(X, method=method) # whitened data
    
    # calculate the chi-squared values of each p(X_w^i)  
    x = np.arange(-5., 5.1, 0.1)
    mise = np.zeros(X_w.shape[1])
    for i_bin in range(X_w.shape[1]): 
        mise[i_bin] = NG.MISE(X_w[:,i_bin], b=b) 

    # plot the most discrepant components. 
    prettyplot()
    fig = plt.figure()
    sub = fig.add_subplot(111)
    i_sort = np.argsort(mise)
    print 'outlier bins = ', i_sort[-5:]
    print 'mise = ', mise[i_sort[-5:]]

    nbin = int(10./b)
    for i_bin in i_sort[-10:]: 
        hb_Xi, Xi_edges = np.histogram(X_w[:,i_bin], bins=nbin, range=[-5., 5.], normed=True) 
        p_X_w_arr = UT.bar_plot(Xi_edges, hb_Xi)
        sub.plot(p_X_w_arr[0], p_X_w_arr[1])

    sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=3, label='$\mathcal{N}(0,1)$')
    sub.set_xlim([-2.5, 2.5])
    sub.set_xlabel('$\mathtt{X^{i}_{W}}$', fontsize=25) 
    sub.set_ylim([0., 0.6])
    sub.set_ylabel('$\mathtt{P(X^{i}_{W})}$', fontsize=25) 
    sub.legend(loc='upper right') 
    
    str_rebin = ''
    if rebin is not None: 
        str_rebin = '.rebin'+str(rebin)

    f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i_outlier.', method, '.', mock, '.ell', str(ell), 
        str_rebin, '.b', str(b), '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None
コード例 #9
0
def div_K(div_func='kl'):
    ''' compare the KL or Renyi divergence for the following with their using different K values 
    - D( gauss(C_X) || gauss(C_X) ) 
    - D( mock X || gauss(C_X))
    - D( mock X || p(X) KDE)
    - D( mock X || p(X) GMM) 
    - D( mock X || PI p(X^i_ICA) KDE)
    - D( mock X || PI p(X^i_ICA) GMM)
    '''
    lbls = [r'$D( P(k) \parallel \mathcal{N}({\bf C}))$',
            r'$D( P(k) \parallel p_\mathrm{KDE}(P(k)))$',
            r'$D( P(k) \parallel p_\mathrm{GMM}(P(k)))$',
            r'$D( P(k) \parallel \prod_i p_\mathrm{KDE}(P(k)_i^\mathrm{ICA}))$', 
            r'$D( P(k) \parallel \prod_i p_\mathrm{GMM}(P(k)_i^\mathrm{ICA}))$']

    fig = plt.figure(figsize=(20,4))
    for i_obv, obv in enumerate(['pk.ngc', 'gmf']):
        if obv == 'pk.ngc': 
            Nref = 2000
            if div_func == 'kl': hranges = [[-0.5, 0.5], [-0.5, 7.], [-0.5, 0.5], [-0.5, 0.5], [-0.5, 0.5]]##7.]
            else: hranges = [[-0.5, 0.5] for i in range(5)]
            Ks = [5, 10, 15] 
        elif obv == 'gmf': 
            Nref = 10000
            hranges = [[-0.1, 0.4], [-0.1, 0.4], [-0.1, 0.4], [-0.1, 0.4], [-0.1, 0.4]]##7.]
            Ks = [10] 

        for K in Ks: 
            fs = ['pX_gauss.K'+str(K), 'pX_scottKDE.K'+str(K), 'pX_GMM.K'+str(K)+'.ncomp30', 
                'pXi_ICA_scottKDE.K'+str(K), 'pXi_ICA_GMM.K'+str(K)+'.ncomp30'] 
            divs, divs_ref = [], [] 
            for f in fs: 
                f_div = ''.join([UT.dat_dir(), 'diverg.', obv, '.', f, '.Nref', str(Nref), '.', 
                    div_func, '.dat']) 
                try: 
                    div = np.loadtxt(f_div)
                except IOError: 
                    print f_div
                    continue 
                divs.append(div) 
         
            nbins = 50
            bkgd = fig.add_subplot(2,1,i_obv+1, frameon=False)
            for i_div, div, lbl in zip(range(len(fs)), divs, lbls): 
                sub = fig.add_subplot(2,5,len(fs)*i_obv+i_div+1)
                y_max = 0. 
                hh = np.histogram(div, normed=True, range=hranges[i_div], bins=nbins)
                bp = UT.bar_plot(*hh) 
                sub.fill_between(bp[0], np.zeros(len(bp[0])), bp[1], edgecolor='none') 
                y_max = max(y_max, bp[1].max()) 
                sub.set_xlim(hranges[i_div])  
                sub.set_ylim([0., y_max*1.4]) 
                if i_obv == 0: 
                    sub.set_title(lbl) 
    
        if div_func == 'kl': 
            bkgd.set_xlabel(r'KL divergence', fontsize=20, labelpad=20)
        elif div_func == 'renyi0.5': 
            bkgd.set_xlabel(r'R\'enyi-$\alpha$ divergence', fontsize=20, labelpad=20)
        bkgd.set_xticklabels([])
        bkgd.set_yticklabels([])
        bkgd.tick_params(labelcolor='none', top='off', bottom='off', left='off', right='off')

    fig.subplots_adjust(wspace=.15, hspace=0.3)
    f_fig = ''.join([UT.fig_dir(), 'tests/Ktest_kNNdiverg.', div_func, '.png'])
    fig.savefig(f_fig, bbox_inches='tight') 
    return None
コード例 #10
0
def p_Xw_i(mock, ell=0, rebin=None, krange=None, ica=False, pca=False): 
    ''' Test the probability distribution function of each X_w^i
    component -- p(X_w^i). First compare the histograms of p(X_w^i) 
    with N(0,1). Then compare the gaussian KDE of p(X_w^i).
    '''
    Pk = NG.dataX(mock, ell=ell, rebin=rebin, krange=krange)
    X, _ = NG.meansub(Pk)
    str_w = 'W'
    if ica and pca: 
        raise ValueError
    if ica: # ICA components
        # ICA components do not need to be Gaussian.
        # in fact the whole point of the ICA transform
        # is to capture the non-Gaussianity...
        X_white, _ = NG.whiten(X) # whitened data
        X_w, _ = NG.Ica(X_white) 
        str_w = 'ICA'
    if pca: # PCA components
        X_w, _ = NG.whiten(X, method='pca') # whitened data
        str_w = 'PCA'
    if not ica and not pca: # just whitened 
        X_w, W = NG.whiten(X) # whitened data
    
    # p(X_w^i) histograms
    fig = plt.figure(figsize=(15,7))
    sub = fig.add_subplot(121)
    for i_bin in range(X_w.shape[1]): 
        p_X_w, edges = np.histogram(X_w[:,i_bin], normed=True)
        p_X_w_arr = UT.bar_plot(edges, p_X_w)
        sub.plot(p_X_w_arr[0], p_X_w_arr[1])
    x = np.arange(-5., 5.1, 0.1)
    sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=3, label='$\mathcal{N}(0,1)$')
    sub.set_xlim([-2.5, 2.5])
    sub.set_xlabel('$\mathtt{X_{'+str_w+'}}$', fontsize=25) 
    sub.set_ylim([0., 0.6])
    sub.set_ylabel('$\mathtt{P(X_{'+str_w+'})}$', fontsize=25) 
    sub.legend(loc='upper right') 

    # p(X_w^i) gaussian KDE fits  
    pdfs = NG.p_Xw_i(X_w, range(X_w.shape[1]), x=x)

    sub = fig.add_subplot(122)
    for i_bin in range(X_w.shape[1]): 
        sub.plot(x, pdfs[i_bin])
    sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=3, label='$\mathcal{N}(0,1)$')
    sub.set_xlim([-2.5, 2.5])
    sub.set_xlabel('$\mathtt{X_{W}}$', fontsize=25) 
    sub.set_ylim([0., 0.6])
    sub.set_ylabel('$\mathtt{P(X_{W})}$', fontsize=25) 
    sub.legend(loc='upper right') 

    str_ica, str_pca = '', ''
    if ica: 
        str_ica = '.ICA'
    if pca: 
        str_pca = '.PCA'

    if rebin is None: 
        f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i', str_pca, str_ica, '.', mock, '.ell', str(ell), '.png'])
    else: 
        f = ''.join([UT.fig_dir(), 'tests/test.p_Xw_i', str_pca, str_ica, '.', mock, '.ell', str(ell), '.rebin', str(rebin), '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None 
コード例 #11
0
def GMF_p_Xw_i(ica=False, pca=False): 
    ''' Test the probability distribution function of each transformed X
    component -- p(X^i). First compare the histograms of p(X_w^i) 
    with N(0,1). Then compare the gaussian KDE of p(X_w^i).
    '''
    gmf = NG.X_gmf_all() # import all the GMF mocks 
    X, _ = NG.meansub(gmf)
    str_w = 'W'
    if ica and pca: raise ValueError
    if ica: # ICA components
        # ICA components do not need to be Gaussian.
        # in fact the whole point of the ICA transform
        # is to capture the non-Gaussianity...
        X_white, _ = NG.whiten(X) # whitened data
        X_w, _ = NG.Ica(X_white) 
        str_w = 'ICA'
    if pca: # PCA components
        X_w, _ = NG.whiten(X, method='pca') # whitened data
        str_w = 'PCA'
    if not ica and not pca: # just whitened 
        X_w, W = NG.whiten(X) # whitened data
    
    # p(X_w^i) histograms
    fig = plt.figure(figsize=(5*gmf.shape[1],4))
    for icomp in range(gmf.shape[1]): 
        sub = fig.add_subplot(1, gmf.shape[1], icomp+1)
        # histogram of X_w^i s 
        hh = np.histogram(X_w[:,icomp], normed=True, bins=50, range=[-5., 5.])
        p_X_w_arr = UT.bar_plot(*hh)
        sub.fill_between(p_X_w_arr[0], np.zeros(len(p_X_w_arr[1])), p_X_w_arr[1], 
                color='k', alpha=0.25)
        x = np.linspace(-5., 5., 100)
        sub.plot(x, UT.gauss(x, 1., 0.), c='k', lw=2, ls=':', label='$\mathcal{N}(0,1)$')
        # p(X_w^i) gaussian KDE fits  
        t_start = time.time() 
        pdf = NG.p_Xw_i(X_w, icomp, x=x, method='gkde')
        sub.plot(x, pdf, lw=2.5, label='Gaussian KDE')
        print 'scipy Gaussian KDE ', time.time()-t_start
        # p(X_w^i) SKlearn KDE fits  
        t_start = time.time() 
        pdf = NG.p_Xw_i(X_w, icomp, x=x, method='sk_kde')
        sub.plot(x, pdf, lw=2.5, label='SKlearn KDE')
        print 'SKlearn CV best-fit KDE ', time.time()-t_start
        # p(X_w^i) statsmodels KDE fits  
        t_start = time.time() 
        pdf = NG.p_Xw_i(X_w, icomp, x=x, method='sm_kde')
        sub.plot(x, pdf, lw=2.5, label='StatsModels KDE')
        print 'Stats Models KDE ', time.time()-t_start
        # p(X_w^i) GMM fits  
        pdf = NG.p_Xw_i(X_w, icomp, x=x, method='gmm', n_comp_max=20)
        sub.plot(x, pdf, lw=2.5, ls='--', label='GMM')
        sub.set_xlim([-3., 3.])
        sub.set_xlabel('$X_{'+str_w+'}^{('+str(icomp)+')}$', fontsize=25) 
        sub.set_ylim([0., 0.6])
        if icomp == 0: 
            sub.set_ylabel('$P(X_{'+str_w+'})$', fontsize=25) 
            sub.legend(loc='upper left', prop={'size': 15}) 

    str_ica, str_pca = '', ''
    if ica: str_ica = '.ICA'
    if pca: str_pca = '.PCA'

    f = ''.join([UT.fig_dir(), 'tests/test.GMF_p_Xw_i', str_pca, str_ica, '.png'])
    fig.savefig(f, bbox_inches='tight') 
    return None 
コード例 #12
0
def divGMF(div_func='kl', Nref=1000, K=5, n_mc=10, n_comp_max=10, n_mocks=2000):
    ''' compare the divergence estimates between 
    D( gauss(C_gmf) || gauss(C_gmf) ),  D( gmfs || gauss(C_gmf) ), 
    D( gmfs || p(gmfs) KDE), D( gmfs || p(gmfs) GMM), 
    D( gmfs || PI p(gmfs^i_ICA) KDE), and D( gmfs || PI p(gmfs^i_ICA) GMM)
    '''
    if isinstance(Nref, float): 
        Nref = int(Nref)
    # read in mock GMFs from all HOD realizations (20,000 mocks)
    gmfs_mock = NG.X_gmf_all()[:n_mocks]
    n_mock = gmfs_mock.shape[0] # number of mocks 
    print("%i mocks" % n_mock) 

    gmfs_mock_meansub, _ = NG.meansub(gmfs_mock) # mean subtract
    X_w, W = NG.whiten(gmfs_mock_meansub)
    X_ica, _ = NG.Ica(X_w)  # ICA transformation 

    C_gmf = np.cov(X_w.T) # covariance matrix

    # p(gmfs) GMM
    gmms, bics = [], [] 
    for i_comp in range(1,n_comp_max+1):
        gmm = GMix(n_components=i_comp)
        gmm.fit(X_w) 
        gmms.append(gmm)
        bics.append(gmm.bic(X_w))
    ibest = np.array(bics).argmin() 
    kern_gmm = gmms[ibest]

    # p(gmfs) KDE 
    t0 = time.time() 
    grid = GridSearchCV(skKDE(),
            {'bandwidth': np.linspace(0.1, 1.0, 30)},
            cv=10) # 10-fold cross-validation
    grid.fit(X_w)
    kern_kde = grid.best_estimator_
    dt = time.time() - t0 
    print('%f sec' % dt) 
    
    # PI p(gmfs^i_ICA) GMM
    kern_gmm_ica = [] 
    for ibin in range(X_ica.shape[1]): 
        gmms, bics = [], [] 
        for i_comp in range(1,n_comp_max+1):
            gmm = GMix(n_components=i_comp)
            gmm.fit(X_ica[:,ibin][:,None]) 
            gmms.append(gmm)
            bics.append(gmm.bic(X_ica[:,ibin][:,None]))
        ibest = np.array(bics).argmin() 
        kern_gmm_ica.append(gmms[ibest])
    
    # PI p(gmfs^i_ICA) KDE  
    kern_kde_ica = [] 
    for ibin in range(X_ica.shape[1]): 
        t0 = time.time() 
        grid = GridSearchCV(skKDE(),
                {'bandwidth': np.linspace(0.1, 1.0, 30)},
                cv=10) # 10-fold cross-validation
        grid.fit(X_ica[:,ibin][:,None]) 
        kern_kde_ica.append(grid.best_estimator_) 
        dt = time.time() - t0 
        print('%f sec' % dt) 

    # caluclate the divergences now 
    div_gauss_ref, div_gauss = [], []
    div_gmm, div_gmm_ica = [], [] 
    div_kde, div_kde_ica = [], [] 
    for i in range(n_mc): 
        print('%i montecarlo' % i)
        t_start = time.time() 
        # reference divergence in order to showcase the estimator's scatter
        # Gaussian distribution described by C_gmf with same n_mock mocks 
        gauss = mvn(np.zeros(gmfs_mock.shape[1]), C_gmf, size=n_mock)
        div_gauss_ref_i = NG.kNNdiv_gauss(gauss, C_gmf, Knn=K, div_func=div_func, Nref=Nref)
        div_gauss_ref.append(div_gauss_ref_i)
        # estimate divergence between gmfs_white and a 
        # Gaussian distribution described by C_gmf
        div_gauss_i = NG.kNNdiv_gauss(X_w, C_gmf, Knn=K, div_func=div_func, Nref=Nref)
        div_gauss.append(div_gauss_i)
        # D( gmfs || p(gmfs) GMM)
        div_gmm_i = NG.kNNdiv_Kernel(X_w, kern_gmm, Knn=K, div_func=div_func, 
                Nref=Nref, compwise=False) 
        div_gmm.append(div_gmm_i)
        # D( gmfs || p(gmfs) KDE)
        div_kde_i = NG.kNNdiv_Kernel(X_w, kern_kde, Knn=K, div_func=div_func, 
                Nref=Nref, compwise=False) 
        div_kde.append(div_kde_i)
        # D( gmfs || PI p(gmfs^i_ICA) GMM), 
        div_gmm_ica_i = NG.kNNdiv_Kernel(X_ica, kern_gmm_ica, Knn=K, div_func=div_func, 
                Nref=Nref, compwise=True)
        div_gmm_ica.append(div_gmm_ica_i)
        # D( gmfs || PI p(gmfs^i_ICA) KDE), 
        div_kde_ica_i = NG.kNNdiv_Kernel(X_ica, kern_kde_ica, Knn=K, div_func=div_func, 
                Nref=Nref, compwise=True)
        div_kde_ica.append(div_kde_ica_i)
        print('t= %f sec' % round(time.time()-t_start,2))

    fig = plt.figure(figsize=(10,5))
    sub = fig.add_subplot(111)
    hrange = [-0.15, 0.6]
    nbins = 50
    
    divs = [div_gauss_ref, div_gauss, div_gmm, div_kde, div_gmm_ica, div_kde_ica]
    labels = ['Ref.', r'$D(\{\zeta_i^{(m)}\}\parallel \mathcal{N}({\bf C}^{(m)}))$', 
            r'$D(\{\zeta^{(m)}\}\parallel p_\mathrm{GMM}(\{\zeta^{m}\}))$',
            r'$D(\{\zeta^{(m)}\}\parallel p_\mathrm{KDE}(\{\zeta^{m}\}))$',
            r'$D(\{\zeta_\mathrm{ICA}^{(m)}\}\parallel \prod_{i} p^\mathrm{GMM}(\{\zeta_{i, \mathrm{ICA}}^{m}\}))$', 
            r'$D(\{\zeta_\mathrm{ICA}^{(m)}\}\parallel \prod_{i} p^\mathrm{KDE}(\{\zeta_{i, \mathrm{ICA}}^{m}\}))$']
    y_max = 0.
    for div, lbl in zip(divs, labels): 
        hh = np.histogram(np.array(div), normed=True, range=hrange, bins=nbins)
        bp = UT.bar_plot(*hh) 
        sub.fill_between(bp[0], np.zeros(len(bp[0])), bp[1], edgecolor='none', 
                alpha=0.5, label=lbl) 
        y_max = max(y_max, bp[1].max()) 
        if (np.average(div) < hrange[0]) or (np.average(div) > hrange[1]): 
            print('divergence of %s (%f) is outside range' % (lbl, np.average(div)))
    sub.set_xlim(hrange) 
    sub.set_ylim([0., y_max*1.2]) 
    sub.legend(loc='upper left', prop={'size': 15})
    # xlabels
    if 'renyi' in div_func: 
        alpha = float(div_func.split(':')[-1])
        sub.set_xlabel(r'Renyi-$\alpha='+str(alpha)+'$ divergence', fontsize=20)
    elif 'kl' in div_func: 
        sub.set_xlabel(r'KL divergence', fontsize=20)
    if 'renyi' in div_func: str_div = 'renyi'+str(alpha) 
    elif div_func == 'kl': str_div = 'kl'
    f_fig = ''.join([UT.fig_dir(), 'tests/kNN_divergence.gmf.K', str(K), '.', str(n_mocks), 
        '.', str_div, '.png'])
    fig.savefig(f_fig, bbox_inches='tight') 
    return None
コード例 #13
0
def div_ICA(obv='pk.ngc', K=10, div_func='kl'):
    ''' compare the KL or Renyi divergence for different ICA decomposition algorithms 
    FastICA deflation, FastICA parallel, Infomax ICA 
    - D( mock X || PI p(X^i_ICA) KDE)
    - D( mock X || PI p(X^i_ICA) GMM)
    '''
    if obv == 'pk.ngc':  str_obv = 'P(k)'
    elif obv == 'gmf': str_obv = '\zeta(N)'
    lbls = [r'$D( '+str_obv+' \parallel \prod_i p_\mathrm{KDE}(P(k)_i^\mathrm{ICA}))$', 
            r'$D( '+str_obv+' \parallel \prod_i p_\mathrm{GMM}(P(k)_i^\mathrm{ICA}))$']

    icas = ['ICA', 'parICA'] 

    if obv == 'pk.ngc': 
        Nref = 2000
        hrange = [-0.5, 0.5]
    elif obv == 'gmf': 
        Nref = 10000
        hranges = [-0.1, 0.4]

    fig = plt.figure(figsize=(10,4))
    bkgd = fig.add_subplot(111, frameon=False)
    for i_div, str_div in enumerate(['scottKDE.K'+str(K), 'GMM.K'+str(K)+'.ncomp30']): 

        divs = []
        for ica in icas: 
            f_div = ''.join([UT.dat_dir(), 'diverg.', obv, 
                '.pXi_', ica, '_', str_div, '.Nref', str(Nref), '.', div_func, '.dat']) 
            try: 
                div = np.loadtxt(f_div)
            except IOError: 
                print f_div
                continue 
            divs.append(div) 

        nbins = 50
        sub = fig.add_subplot(1,2,i_div+1)
        y_max = 0. 
        for div, ica in zip(divs, icas): 
            print np.mean(div)
            hh = np.histogram(div, normed=True, range=hrange, bins=nbins)
            bp = UT.bar_plot(*hh) 
            sub.fill_between(bp[0], np.zeros(len(bp[0])), bp[1], edgecolor='none', label=ica) 
            y_max = max(y_max, bp[1].max()) 
        if i_div == 0: sub.legend(loc='upper left', prop={'size': 20}) 
        sub.set_xlim(hrange)  
        sub.set_ylim([0., y_max*1.4]) 
        sub.set_title(lbls[i_div]) 

    if div_func == 'kl': 
        bkgd.set_xlabel(r'KL divergence', fontsize=20, labelpad=20)
    elif div_func == 'renyi0.5': 
        bkgd.set_xlabel(r'R\'enyi-$\alpha$ divergence', fontsize=20, labelpad=20)
    bkgd.set_xticklabels([])
    bkgd.set_yticklabels([])
    bkgd.tick_params(labelcolor='none', top='off', bottom='off', left='off', right='off')

    fig.subplots_adjust(wspace=.15, hspace=0.3)
    f_fig = ''.join([UT.fig_dir(), 'tests/',
        'ICA_kNNdiverg.', obv, '.K', str(K), '.', div_func, '.png'])
    fig.savefig(f_fig, bbox_inches='tight') 
    return None
コード例 #14
0
ファイル: Read_performance.py プロジェクト: weishancc/SPChain
# Plotted read functions include:
#    - readConsent
#    - readArtwork
#    - getHistoryForArtwork
#    - readLog
#    - readModel
#    - readBalance
#    - invokeModel

import util
from matplotlib import pyplot as plt

if __name__ == "__main__":
    filterFunc = [
        'initialConsent', 'grantRevokeConsent', 'uploadArtwork',
        'transferArtwork', 'addLog', 'addModel', 'addWallet',
        'transferBlalance'
    ]
    data = util.concat_result(filterFunc, 7)  # 7 for tps

    fig, ax = plt.subplots(figsize=(10, 6))
    util.bar_plot(ax, data, total_width=.75, single_width=.9)

    plt.xticks(range(7), ["10", "20", "30", "40", "50", "60", "70"])
    plt.xlabel('txDuration (sec)')
    plt.ylabel('Throughtput (tps)')
    plt.title(
        'Read performance of different functions under different transaction duration'
    )
    plt.show()
コード例 #15
0
def new_feature_set_plot(metric='acc', absolute=True, poly=True, show=False):
    print "Plotting new_feature_set_plot, metric: %s" % metric
    classifiers = list(models.CLASSIFIER_KEYS)
    new_features = []
    if absolute:
        new_features += ['none']
    new_features += models.NEW_FEATURE_SETS
    classifiers.remove('DummyClassifier')
    dfs = []

    for fs in new_features:
        for classifier in classifiers:
            df = get_new_feature_results(fs,
                                         classifier,
                                         metric,
                                         absolute=absolute,
                                         poly=poly)
            util.print_ci_from_df(df['folds'], fs, classifier)
            dfs.append(df)

    dfs = pd.concat(dfs)
    dfs = dfs.replace('none', 'baseline')

    y_lim = (.68, .90)

    if metric == 'acc':
        y_label = "Accuracy"
    elif metric == 'fms':
        y_label = "F-Measure"
    else:
        y_label = "AUC"
        y_lim = (.70, .95)

    figname = 'new_feature_plot_%s' % metric
    title = 'Performance w/ New Feature Sets'
    if not absolute:
        y_label = "Change in %s" % y_label
        y_lim = (-.10, .10)
        figname = figname + '_relative'
        title = 'Change in Performance w/ New Feature Sets'

    plot_specs = {
        'x_col': 'new_feature_set',
        'y_col': 'folds',
        'hue_col': 'model',
        'x_label': 'Feature Set',
        'y_label': y_label,
        'y_lim': y_lim,
        'figsize': (10, 8),
        'fontsize': 20,
        'font_scale': 1.2,
        'labelsize': 15,
        'show': show,
        'title': title,
    }

    # We use polynomial terms as well for halves
    if poly:
        dfs = dfs.replace('halves', 'halves+quadratic')
    else:
        figname = figname + '_without_quadratic'

    figname = figname + '.pdf'
    bar_plot(dfs, figname, **plot_specs)
コード例 #16
0
ファイル: paper.py プロジェクト: changhoonhahn/centralMS
def groupcatSFMS(mrange=[10.6,10.8]): 
    '''Figure of the z~0 group catalog. 
    Panel a) SFR-M* relation 
    Panel b) P(SSFR) with SFMS fitting 
    '''
    # Read in Jeremy's group catalog  with Mr_cut = -18
    gc = Cat.Observations('group_catalog', Mrcut=18, position='central')
    gc_cat = gc.Read() 
    fig = plt.figure(figsize=(10,5)) 

    # fit the SFMS using lettalkaboutquench sfms fitting
    _fSFMS = fstarforms() 
    _fit_logm, _fit_logsfr = _fSFMS.fit(gc_cat['mass'], gc_cat['sfr'], method='gaussmix', fit_range=None)
    logsfr_ms = _fSFMS.powerlaw(logMfid=10.5) 
    print _fSFMS._powerlaw_m
    print _fSFMS._powerlaw_c

    fSFMS = fstarforms() 
    fit_logm, _ = fSFMS.fit(gc_cat['mass'], gc_cat['sfr'], method='gaussmix', fit_range=mrange)
    _, fit_fsfms = fSFMS.frac_SFMS()
    i_fit = np.abs(fit_logm - np.mean(mrange)).argmin()

    # log SFR - log M* highlighting where the SFMS lies 
    sub1 = fig.add_subplot(1,2,1)
    DFM.hist2d(gc_cat['mass'], gc_cat['sfr'], color='#ee6a50',
            levels=[0.68, 0.95], range=[[9., 12.], [-3.5, 1.5]], 
            plot_datapoints=True, fill_contours=False, plot_density=True, ax=sub1) 
    gc = Cat.Observations('group_catalog', Mrcut=18, position='central')
    gc_cat = gc.Read() 
    #sub1.vlines(mrange[0], -5., 2., color='k', linewidth=2, linestyle='--')
    #sub1.vlines(mrange[1], -5., 2., color='k', linewidth=2, linestyle='--')
    #sub1.fill_between(mrange, [2.,2.], [-5.,-5], color='#1F77B4', alpha=0.25)
    sub1.fill_between(mrange, [2.,2.], [-5.,-5], color='k', linewidth=0, alpha=0.25)
    print _fit_logm, _fit_logsfr
    sub1.plot(np.linspace(9.8, 11., 10), logsfr_ms(np.linspace(9.8, 11., 10)), c='k', linestyle='--') 
    sub1.set_xticks([9., 10., 11., 12.])
    sub1.set_xlabel('log$(\; M_*\; [M_\odot]\;)$', fontsize=20)
    sub1.set_yticks([-3., -2., -1., 0., 1.])
    sub1.set_ylabel('log$(\; \mathrm{SFR}\; [M_\odot/\mathrm{yr}]\;)$', fontsize=20)
    sub1.text(0.95, 0.1, 'SDSS central galaxies',
            ha='right', va='center', transform=sub1.transAxes, fontsize=20)

    # P(log SSFR) 
    sub2 = fig.add_subplot(1,2,2)
    inmbin = np.where((gc_cat['mass'] > mrange[0]) & (gc_cat['mass'] < mrange[1]))
    bedge, pp = np.histogram(gc_cat['ssfr'][inmbin], range=[-14., -9.], bins=32, normed=True)
    pssfr = UT.bar_plot(pp, bedge)
    sub2.plot(pssfr[0], pssfr[1], c='k', lw=2) 
    # overplot GMM component for SFMS
    gmm_weights = fSFMS._gmix_weights[i_fit]
    gmm_means = fSFMS._gmix_means[i_fit]
    gmm_vars = fSFMS._gmix_covariances[i_fit]
    icomp = gmm_means.argmax()
    xx = np.linspace(-14., -9, 100)
    sub2.fill_between(xx, np.zeros(len(xx)), gmm_weights[icomp]*MNorm.pdf(xx, gmm_means[icomp], gmm_vars[icomp]), 
            color='#1F77B4', linewidth=0)

    for i_comp in range(len(gmm_vars)): 
        if i_comp == 0: 
            gmm_tot = gmm_weights[i_comp]*MNorm.pdf(xx, gmm_means[i_comp], gmm_vars[i_comp])
        else: 
            gmm_tot += gmm_weights[i_comp]*MNorm.pdf(xx, gmm_means[i_comp], gmm_vars[i_comp])
    
    #sub2.plot(xx, gmm_tot, color='r', linewidth=2)

    sub2.set_xlim([-13.25, -9.5]) 
    sub2.set_xticks([-10., -11., -12., -13.][::-1])
    #sub2.set_xlim([-9.5, -13.25]) 
    #sub2.set_xticks([-10., -11., -12., -13.])
    sub2.set_xlabel('log$(\; \mathrm{SSFR}\; [\mathrm{yr}^{-1}]\;)$', fontsize=20)
    sub2.set_ylim([0., 1.5]) 
    sub2.set_yticks([0., 0.5, 1., 1.5])
    sub2.set_ylabel('$p\,(\;\mathrm{log}\; \mathrm{SSFR}\;)$', fontsize=20)
    # mass bin 
    sub2.text(0.5, 0.9, '$'+str(mrange[0])+'< \mathrm{log}\, M_* <'+str(mrange[1])+'$',
            ha='center', va='center', transform=sub2.transAxes, fontsize=20)
    sub2.text(0.9, 0.33, '$f_\mathrm{SFMS}='+str(round(fit_fsfms[i_fit],2))+'$',
            ha='right', va='center', transform=sub2.transAxes, fontsize=20)
    fig.subplots_adjust(wspace=.3)
    fig.savefig(''.join([UT.tex_dir(), 'figs/groupcat.pdf']), bbox_inches='tight', dpi=150) 
    plt.close()
    return None