Example #1
0
 def test_ndim_2_facet_wrap(self):
     p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap(
         'cut', 'clarity')
     nrow, ncol = p.facets.nrow, p.facets.ncol
     self.assertEqual(nrow, 7)
     self.assertEqual(ncol, 6)
     self.assertEqual(p.facets.ndim, 40)
Example #2
0
 def test_ndim_2_facet_wrap_subplots(self):
     p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap(
         'cut', 'clarity')
     fig, subplots = p.make_facets()
     nrow, ncol = subplots.shape
     self.assertEqual(nrow, 7)
     self.assertEqual(ncol, 6)
def plot_bin_dists(df, bin_def="distance_bin <= 500"):
    plt.rcParams['figure.figsize'] = np.array([16, 12]) * 0.65

    p = gp.ggplot(gp.aes(x='R2'), data=df.query(bin_def))
    p = p + gp.geom_histogram(
        fill='coral') + gp.facet_wrap("distance_bin") + gp.theme_seaborn(
            context='talk') + gp.ggtitle(bin_def)

    return p
Example #4
0
def _ggplot(df, out_file):
    """Plot faceted items with ggplot wrapper on top of matplotlib.
    XXX Not yet functional
    """
    import ggplot as gg
    df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]]
    df["category"] = [cat_labels[x] for x in df["category"]]
    df["caller"] = [caller_labels.get(x, None) for x in df["caller"]]
    p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar() +
         gg.facet_wrap("variant.type", "category") + gg.theme_seaborn())
    gg.ggsave(p, out_file)
Example #5
0
def _ggplot(df, out_file):
    """Plot faceted items with ggplot wrapper on top of matplotlib.
    XXX Not yet functional
    """
    import ggplot as gg
    df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]]
    df["category"] = [cat_labels[x] for x in df["category"]]
    df["caller"] = [caller_labels.get(x, None) for x in df["caller"]]
    p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar()
         + gg.facet_wrap("variant.type", "category")
         + gg.theme_seaborn())
    gg.ggsave(p, out_file)
Example #6
0
def plotHistogramMeans(hist, fileName):
    num_clust = hist.shape[0]
    IDS = np.mat(range(0, num_clust))
    IDS = IDS.reshape(num_clust, 1)

    histD = np.concatenate((IDS, hist), axis=1)

    Data = pd.DataFrame(histD, columns=['ID'] + range(0, hist.shape[1]))
    Melted = pd.melt(Data, id_vars=['ID'])
    pv = ggplot.ggplot(
        ggplot.aes(x='variable', y='value'),
        data=Melted) + ggplot.geom_line() + ggplot.facet_wrap("ID")
    print "Saving mean histograms"
    ggplot.ggsave(pv, './IMG/' + fileName)
def plot_after_transmission_results(data, path_names):

    # import input data for tranmission analysis
    var_and_val = pd.DataFrame(columns=['x', 'Variable'], index=range(0, 12))
    plot_lm = pd.DataFrame(
        columns=['x', 'Life Months', 'Scenario', 'Variable'],
        index=range(0, 24))
    data_in = pd.read_excel(
        os.path.join(path_names['transmission'], 'Input files',
                     'transmission_rate_multiplier_required_inputs.xlsx'))
    col = [
        'Yearly incidence in MSM',
        'Number of HIV uninfected individuals (HRG size)',
        'Number of HIV infected individuals in primary cohort at t=0'
    ]
    col_adj = ['Incidence', 'Uninfected', 'Infected']
    base_val = [0.009, 2960000, 136400]

    for i in range(len(col)):
        idx = data_in.loc[data_in.loc[:, col[i]] != base_val[i],
                          col[i]].index.values[0]
        var_and_val.loc[idx - 1:idx + 3 - 1, 'x'] = data_in.loc[idx:idx + 3,
                                                                col[i]].values
        var_and_val.loc[idx - 1:idx + 3 - 1, 'Variable'] = col_adj[i]

    row_idx = -2
    var_idx = [-1, -1, -1]
    for var in data:

        if 'HIV+' in var:
            var_idx[2] += 1
            plot_lm.loc[row_idx:row_idx + 1, 'x'] = var_and_val.loc[
                var_and_val['Variable'] == col_adj[2], 'x'].values[var_idx[2]]
            plot_lm.loc[row_idx:row_idx + 1, 'Variable'] = var_and_val.loc[
                var_and_val['Variable'] == col_adj[2],
                'Variable'].values[var_idx[2]]
            plot_lm.loc[
                row_idx:row_idx + 1,
                'Life Months'] = data[var]['popstats'].loc[:, 'LMs_'].values
            plot_lm.loc[
                row_idx:row_idx + 1,
                'Scenario'] = data[var]['popstats'].loc[:, 'RUN_NAME_'].values
        elif 'HIV-' in var:
            var_idx[1] += 1
            plot_lm.loc[row_idx:row_idx + 1, 'x'] = var_and_val.loc[
                var_and_val['Variable'] == col_adj[1], 'x'].values[var_idx[1]]
            plot_lm.loc[row_idx:row_idx + 1, 'Variable'] = var_and_val.loc[
                var_and_val['Variable'] == col_adj[1],
                'Variable'].values[var_idx[1]]
            plot_lm.loc[
                row_idx:row_idx + 1,
                'Life Months'] = data[var]['popstats'].loc[:, 'LMs_'].values
            plot_lm.loc[
                row_idx:row_idx + 1,
                'Scenario'] = data[var]['popstats'].loc[:, 'RUN_NAME_'].values
        elif 'Incidence' in var:
            var_idx[0] += 1
            plot_lm.loc[row_idx:row_idx + 1, 'x'] = var_and_val.loc[
                var_and_val['Variable'] == col_adj[0], 'x'].values[var_idx[0]]
            plot_lm.loc[row_idx:row_idx + 1, 'Variable'] = var_and_val.loc[
                var_and_val['Variable'] == col_adj[0],
                'Variable'].values[var_idx[0]]
            plot_lm.loc[
                row_idx:row_idx + 1,
                'Life Months'] = data[var]['popstats'].loc[:, 'LMs_'].values
            plot_lm.loc[
                row_idx:row_idx + 1,
                'Scenario'] = data[var]['popstats'].loc[:, 'RUN_NAME_'].values

        row_idx += 2

    # plot
    save_path = os.path.join(path_names['transmission'], r'Input files',
                             r'Plots for final runs')
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    (ggplot(aes(x='x', y='Life Months', color='Scenario'), plot_lm) +
     geom_line() + facet_wrap('Variable', scales='free')).save(
         os.path.join(save_path, 'Comparison of '))

    return
Example #8
0
import pandas as pd
import numpy as np
# from source import view_and_print_output
import ggplot as gg


df = pd.DataFrame()
for num_layers, num_nodes in [(2, 50), (2, 100), (2, 150), (2, 200), (4, 50), (4, 100), (4, 150), (4, 200)]:
    file_coarse = '../../data/coarse_lambda_dropout_' + str(num_layers) + '_' + str(num_nodes) + '.txt'
    newdata = pd.read_csv(file_coarse)
    newdata = newdata.sort_values(by='validation error', ascending=True)
    newdata['lambda'] = np.log10(newdata['lambda'])
    newdata['index'] = (np.arange(len(newdata), dtype='float')/len(newdata))**3
    newdata['config'] = str(num_layers * 100 + num_nodes) +  ' ' +  str(num_layers) + ' ' + str(num_nodes)
    df = df.append(newdata)
print(df.sort_values(by='validation error', ascending=False).head(20))
p = gg.ggplot(gg.aes(x='lambda', y='dropout prob', color='index'), data=df) + \
        gg.geom_point() + \
        gg.xlab('lambda') + \
        gg.ylab('dropout prob') + \
        gg.scale_x_continuous(limits=(-5, 2)) + \
        gg.facet_wrap('config')
print(p)

# Conclusion: ignore dropout
"""Plot target variable as time series."""

import get_data
from ggplot import aes, geom_line, facet_wrap, ggplot


if __name__ == "__main__":

    df = get_data.get_all_data()

    p = ggplot(df, aes('datetime', 'cap', group='date')) + \
        geom_line(alpha=0.2) + \
        facet_wrap('name')
    p.save('../output/time_series.pdf')
def plot_transmission_results(tx_results, percentage_decline, save_path,
                              path_names):

    #%% what are inputs?

    # transmission results
    # There'll be a folder called 'Runs prepared for ...'
    # all the folders inside that folder will have a CEPAC results folder.
    # tx_data is a dictionary and will have two keys, 'monthly' and 'popstats'
    # 'monthly' key will only have primary transmissions data
    tx_data = deepcopy(tx_results)
    t = 120
    total_var = 3
    total_val = 4
    # percentage decline
    # this is also dictionary of percentage decline values for each folder
    # having cepac results

    # save_path eaxact folder where you want to save your images

    # path_names will have paths to transmissions and sensitivity directories

    #%% plot percentage decline

    # geberate an environment object first
    # lets go for line plot
    data_plot = pd.DataFrame(
        columns=['x', 'Percentage decline', 'Transmissions', 'Variable'],
        index=range(0, total_var * total_val))
    data_in = pd.read_excel(
        os.path.join(path_names['transmission'], 'Input files',
                     'transmission_rate_multiplier_required_inputs.xlsx'))
    col = [
        'Incidence rate per 100 PY specific to high-risk group 1',
        'HIV uninfected individuals in high-risk group 1',
        'HIV infected individuals in high-risk group 1'
    ]
    col_adj = ['Incidence', 'Uninfected', 'Infected']
    data_in[col[0]] = data_in[col[0]].round(1)
    base_val = [np.float64(0.9), 2960000, 136400]
    y1_values = {col[0]: [], col[1]: [], col[2]: []}
    for var in percentage_decline:
        if 'HIV+' in var:
            y1_values[col[2]].append(percentage_decline[var])
        elif 'HIV-' in var:
            y1_values[col[1]].append(percentage_decline[var])
        elif 'Incidence' in var:
            y1_values[col[0]].append(percentage_decline[var])

    for i in range(len(col)):
        idx = data_in.loc[data_in.loc[:, col[i]] != base_val[i],
                          col[i]].index.values[0]
        data_plot.loc[idx - 1:idx + 3 - 1, 'x'] = data_in.loc[idx:idx + 3,
                                                              col[i]].values
        data_plot.loc[idx - 1:idx + 3 - 1, 'Variable'] = col_adj[i]
        data_plot.loc[idx - 1:idx + 3 - 1,
                      'Percentage decline'] = y1_values[col[i]]

    # plot
    df_float = data_plot.loc[data_plot.loc[:, 'Percentage decline'] <= 200, :]
    (ggplot(aes(x='x', y='Percentage decline'), df_float) + geom_line() +
     facet_wrap('Variable', scales='free')).save(
         os.path.join(save_path, 'Percentage decline'))
    del df_float

    #%% visualizing transmissions
    # index = range(time * number of values for each variable * number of variables)
    def set_abc(run, var_idx, var_name, var_value_idx):

        # set variable names
        data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                         'Variable'] = var_name

        # set variable value
        data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                         'Value'] = data_plot.loc[
                             data_plot.loc[:, 'Variable'] == var_name,
                             'x'].values[var_value_idx]

        if 'RunA' in run:
            data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                             'RunA tx'] = tx_data[var]['monthly'][run][
                                 'transmissions'].iloc[0:t].values
        elif 'RunB' in run:
            data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                             'RunB tx'] = tx_data[var]['monthly'][run][
                                 'transmissions'].iloc[0:t].values
        elif 'RunC' in run:
            data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                             'RunC tx'] = tx_data[var]['monthly'][run][
                                 'transmissions'].iloc[0:t].values

    data_plot_tx = pd.DataFrame(
        index=range(t * total_var * total_val),
        columns=['Variable', 'Value', 'RunA tx', 'RunB tx', 'RunC tx'])
    var_idx = -1
    var_val_idx = [-1, -1, -1]
    for var in tx_data:
        var_idx += 1
        if 'HIV+' in var:
            var_val_idx[2] += 1
            var_name = col_adj[2]
            for run in tx_data[var]['monthly']:
                set_abc(run, var_idx, var_name, var_val_idx[2])
        elif 'HIV-' in var:
            var_val_idx[1] += 1
            var_name = col_adj[1]
            for run in tx_data[var]['monthly']:
                set_abc(run, var_idx, var_name, var_val_idx[1])
        elif 'Incidence' in var:
            var_val_idx[0] += 1
            var_name = col_adj[0]
            for run in tx_data[var]['monthly']:
                set_abc(run, var_idx, var_name, var_val_idx[0])
        else:
            continue

    data_plot_tx['t'] = 0
    t_float = -1
    for row in data_plot_tx.index:
        if t_float == t - 1:
            t_float = -1
        t_float += 1
        data_plot_tx.loc[row, 't'] = t_float

    #%% plots for individual runs
    run_col = ['RunA tx', 'RunB tx', 'RunC tx']
    inci = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == 'Incidence', :]
    inf = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == 'Infected', :]
    uninf = data_plot_tx.loc[data_plot_tx.loc[:,
                                              'Variable'] == 'Uninfected', :]
    for i in run_col:
        (ggplot(aes(x='t', y=i, color='Value'), data_plot_tx) + geom_line() +
         facet_wrap('Variable', scales='free')).save(
             os.path.join(
                 save_path,
                 str(i + r'_transmissions for all variable all values')))
        (ggplot(aes(x='t', y=i), inci) + geom_line() +
         facet_wrap('Variable', 'Value', scales='free')).save(
             os.path.join(
                 save_path,
                 str(i + r'_plots for individual values of incidence')))
        (ggplot(aes(x='t', y=i), inf) + geom_line() +
         facet_wrap('Variable', 'Value', scales='free')).save(
             os.path.join(
                 save_path,
                 str(i +
                     r'_plots for individual values of infected population')))
        (ggplot(aes(x='t', y=i), uninf) + geom_line() +
         facet_wrap('Variable', 'Value', scales='free')).save(
             os.path.join(
                 save_path,
                 str(i +
                     '_plots for individual values of uninfected population')))

    #%% compare runs ABC
    data_plot_abc = {}
    for var in col_adj:
        float_df = pd.DataFrame(index=range(0, t * total_var * total_val),
                                columns=['t', 'Value', 'Transmissions', 'Run'])
        insert_idx = -1
        for val in data_plot.loc[data_plot.loc[:, 'Variable'] == var, 'x']:
            var_df = data_plot_tx.loc[data_plot_tx.loc[:,
                                                       'Variable'] == var, :]
            var_df = var_df.reset_index(drop=True)
            var_val_df = var_df.loc[var_df.loc[:, 'Value'] == val, :]
            var_val_df = var_val_df.reset_index(drop=True)
            for c in ['RunA tx', 'RunB tx', 'RunC tx']:
                insert_idx += 1
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             'Run'] = c
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             'Transmissions'] = var_val_df.loc[:, c].values
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             'Run'] = c
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             'Value'] = val
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             't'] = np.arange(t)
        data_plot_abc[var] = float_df.dropna()
        (ggplot(aes(x='t', y='Transmissions', color='Run'), float_df) +
         geom_line() + facet_wrap('Value', scales='free') + ggtitle(var)).save(
             os.path.join(
                 save_path,
                 str(var + '_comparison of transmissions in runs ABC')))

    #%% compare runs BC
    for var in data_plot_abc:
        float_df = data_plot_abc[var].loc[
            data_plot_abc[var].loc[:, 'Run'] != 'RunA tx', :]
        (ggplot(aes(x='t', y='Transmissions', color='Run'), float_df) +
         geom_line(alpha=0.2) + facet_wrap('Value', scales='free') +
         stat_smooth(method='loess', se=False) + ggtitle(var)).save(
             os.path.join(save_path,
                          str(var +
                              '_comparison of transmissions in runs BC')))

    return
Example #11
0
import sys
from pandas.plotting import register_matplotlib_converters

register_matplotlib_converters()

species = 'no2'
df = pd.read_csv(r'.\charts\background_data_melted.csv',
                 index_col='idx',
                 dtype={
                     'timestamp': 'str',
                     'vidperiod': 'str',
                     'type': 'str',
                     'param': 'str',
                     'value': 'float64'
                 })
print(df[:10])
df['timestamp'] = pd.to_datetime(df['timestamp'], format="%Y-%m-%d %H:%M:%S")
#plots
plt1 = gg.ggplot(df, gg.aes(
    x='timestamp', y='value', color='type')) + gg.geom_line() + gg.xlab(
        'Time') + gg.ylab('Concentration') + gg.theme_bw() + gg.ylim(
            0, 100) + gg.facet_wrap('vidperiod', scales='free') + gg.ggtitle(
                'Regional background comparison {0}'.format(species))
#+gg.theme(axis_text_x=gg.element_text(angle=20))
plt1.save(filename=r'.\charts\background_{0}_ggtest_{1}.png'.format(
    species,
    dt.datetime.today().strftime('%Y%b%d')),
          width=None,
          height=None,
          dpi=300)
      geom_point(color='steelblue') +
      xlab('Engine Displacement') +
      ylab('Average MPG') +
      ggtitle('Gasoline cars'))

#%% step 10
grouped_by_year = vehicles_non_hybrid.groupby(['year'])
avg_grouped_by_year = grouped_by_year['displ', 'comb08'].agg([np.mean])

#%% step 11
avg_grouped_by_year['year'] = avg_grouped_by_year.index
melted_avg_grouped_by_year = pd.melt(avg_grouped_by_year, id_vars='year')

from ggplot import facet_wrap
p = ggplot(aes(x='year', y='value', color='variable_0'), data=melted_avg_grouped_by_year)
p + geom_point() + facet_wrap('variable_0')


#%%  Section Investigating the makes and models of automobiles with Python
# ------ step 1, 2 ------------------
pd.unique(vehicles_non_hybrid.cylinders)
vehicles_non_hybrid.cylinders = vehicles_non_hybrid.cylinders.astype('float')
pd.unique(vehicles_non_hybrid.cylinders)
vehicles_non_hybrid_4 = vehicles_non_hybrid[(vehicles_non_hybrid.cylinders == 4.0)]

#%% step 3
import matplotlib.pyplot as plt
%matplotlib inline

grouped_by_year_4_cylinder = vehicles_non_hybrid_4.groupby(['year']).make.nunique()
fig = grouped_by_year_4_cylinder.plot()
Example #13
0
 def test_facet_wrap_ncol(self):
     p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap('cut',
                                                                   ncol=2)
     nrow, ncol = p.facets.nrow, p.facets.ncol
     self.assertEqual(nrow, 3)
     self.assertEqual(ncol, 2)
Example #14
0
        tile(w_from_figure_wh_ratio, norm(data)),
        '%s-layer-acts-%s-%s-(i=%s)' % (img_desc, layer, show_tuple_tight(data.shape), batch_i),
    )

conv_layers = filter(lambda (layer, acts): len(acts.data.shape) == 4, net.blobs.items())
fc_layers   = filter(lambda (layer, acts): len(acts.data.shape) != 4, net.blobs.items())

# Plot conv acts
for layer, acts in conv_layers:
    plot_conv_acts(layer, acts)

# Plot fc acts
df = pd.concat([
    pd.DataFrame({'act': acts.data[batch_i], 'layer': layer}).reset_index()
    for layer, acts in fc_layers
])
plot_gg(gg_layer(
    gg.ggplot(df, gg.aes(y='act', x='index')),
    gg.geom_point(alpha=.5),
    gg.facet_wrap(x='layer', scales='free'),
    gg.ggtitle('%s layer acts fc/prob points (i=%s)' % (img_desc, batch_i)),
))
plot_gg(gg_layer(
    gg.ggplot(df, gg.aes(x='act')),
    gg.geom_histogram(bins=25, size=0),
    gg.facet_wrap(x='layer', scales='free'),
    gg.scale_y_log(),
    gg.ylim(low=0.1),
    gg.ggtitle('%s layer acts fc/prob histo (i=%s)' % (img_desc, batch_i)),
))
Example #15
0
# 安装ggplot,需要numpy, scipy支持,安装过程容易报错
# 升级pip, 以免安装.whl失败。注意 .whl文件名不能修改,不要使用迅雷下载
# pip install --upgrade setuptools

# 安装numpy,scipy,windows下需要编译,可以在http://www.lfd.uci.edu/~gohlke/pythonlibs/ 下载编译包.whl安装。
# pip install .whl

# windows下需要安装VC++ 14.0,http://landinghub.visualstudio.com/visual-cpp-build-tools ,在该网站下载 Visual C++ Build Tools 2015

# 安装ggplot
# pip install -i https://pypi.tuna.tsinghua.edu.cn/simple ggplot

# 绘制散点图
import ggplot as gp
meat = gp.meat  # 使用ggplot自带的测试数据
p = gp.ggplot(
    gp.aes(
        x='date',  # 指定x轴数据
        y='beef',  # 指定y轴数据
        color='beef'),  # 指定填充颜色
    data=meat)  # 指定数据集

p + gp.geom_line()  # 绘制折线图
p + gp.geom_point()  # 绘制散点图

# 绘制分面图
gp.ggplot(gp.aes(x='carat', y='price', color='color'),
          data=gp.diamonds) + gp.geom_point() + gp.facet_wrap('cut')

# 绘制直方图
gp.ggplot(gp.aes(x='price'), data=gp.diamonds) + gp.geom_histogram()
Example #16
0
                                  for x in repeatedKnnResults],
                                 columns = ['p',
                                            'k',
                                            'cvAccuracy',
                                            'testAccuracy'])


ggdata = pandas.concat(
    [DataFrame({'log10(p)' : log10(knnResultsSimplified.p),
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'cv',
                'Accuracy' : knnResultsSimplified.cvAccuracy}),
     DataFrame({'log10(p)' : log10(knnResultsSimplified.p),
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'test',
                'Accuracy' : knnResultsSimplified.testAccuracy})],
    axis = 0
)

ggobj = ggplot.ggplot(
    data = ggdata,
    aesthetics = ggplot.aes(x='log10(p)', y='Accuracy',
                            color='type', group='type', linetype='type')
)
ggobj += ggplot.theme_bw()
# ggobj += ggplot.scale_x_log()
ggobj += ggplot.geom_point(alpha=0.6)
ggobj += ggplot.stat_smooth()
ggobj += ggplot.facet_wrap('k') 
print ggobj
Example #17
0
 def test_ndim_2_facet_wrap(self):
     p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap('cut', 'clarity')
     nrow, ncol = p.facets.nrow, p.facets.ncol
     self.assertEqual(nrow, 7)
     self.assertEqual(ncol, 6)
     self.assertEqual(p.facets.ndim, 40)
Example #18
0
                                  for x in repeatedKnnResults],
                                 columns = ['p',
                                            'k',
                                            'cvAccuracy',
                                            'testAccuracy'])


ggdata = pandas.concat(
    [DataFrame({'log10(p)' : log10(knnResultsSimplified.p),
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'cv',
                'Accuracy' : knnResultsSimplified.cvAccuracy}),
     DataFrame({'log10(p)' : log10(knnResultsSimplified.p),
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'test',
                'Accuracy' : knnResultsSimplified.testAccuracy})],
    axis = 0
)

ggobj = ggplot.ggplot(
    data = ggdata,
    aesthetics = ggplot.aes(x='log10(p)', y='Accuracy',
                            color='type', group='type', linetype='type')
)
ggobj += ggplot.theme_bw()
# ggobj += ggplot.scale_x_log()
ggobj += ggplot.geom_point(alpha=0.6)
ggobj += ggplot.stat_smooth()
ggobj += ggplot.facet_wrap('k') 
print(ggobj)
ax.legend(["Entries", "Exits"])
ax.set_ylabel("Entries/exits per hour (1e6 is a million)")
ax.set_xlabel("Hour (0 is midnight, 12 is noon, 23 is 11pm)")
ax.set_xlim(0, 23)

turnstile_rain = turnstile_weather[["rain", "ENTRIESn_hourly", "EXITSn_hourly"]]
turnstile_rain["rain2"] = np.where(turnstile_rain["rain"] == 1, "raining", "not raining")
turnstile_rain.groupby("rain2").describe()

turnstile_rain = turnstile_weather[["rain", "ENTRIESn_hourly", "EXITSn_hourly"]]
turnstile_rain["ENTRIESn_hourly_log10"] = np.log10(turnstile_rain["ENTRIESn_hourly"] + 1)
turnstile_rain["rain2"] = np.where(turnstile_rain["rain"] == 1, "raining", "not raining")
set1 = brewer2mpl.get_map('Set1', 'qualitative', 3).mpl_colors
plot = gg.ggplot(turnstile_rain, gg.aes(x="ENTRIESn_hourly_log10", color="rain2")) + \
       gg.geom_density() + \
       gg.facet_wrap("rain2", scales="fixed") + \
       gg.scale_colour_manual(values=set1) + \
       gg.xlab("log10(entries per hour)") + \
       gg.ylab("Number of turnstiles") + \
       gg.ggtitle("Entries per hour whilst raining and not raining")
plot

np.random.seed(42)
data = pd.Series(np.random.normal(loc=180, scale=40, size=600))
data.hist()

p = turnstile_weather["ENTRIESn_hourly"].hist()
pylab.suptitle("Entries per hour across all stations")
pylab.xlabel("Entries per hour")
pylab.ylabel("Number of occurrences")
Example #20
0
        "insignificant": coefficients[feature]["unsignificant"]
    }
    df = pd.DataFrame.from_dict(values_dict, orient='index')
    df = df.transpose()
    df = pd.melt(df)
    df['feature'] = feature
    dfs_to_concat.append(df)

master_df = pd.concat(dfs_to_concat)

# histogram
p = ggplot(aes(x='value', fill='variable', color='variable'), data=master_df)
p += geom_histogram(bins=25, alpha=0.5)
p += scale_x_continuous(limits=(-25, 25))
p += ggtitle("sarimax coefficient magnitude distribution")
p += facet_wrap("feature", ncol=3, scales="free")
p += labs(x=" ", y=" ")

# visuals
t = theme_gray()
t._rcParams['font.size'] = 10
t._rcParams['font.family'] = 'monospace'

p += t
p.save("arima_1/" + "histogram.png")

# boxplot
p = ggplot(aes(x='variable', y='value'), data=master_df)
p += geom_boxplot()
p += scale_y_continuous(limits=(-25, 25))
p += ggtitle("sarimax coefficient magnitudes")
Example #21
0
]]
turnstile_rain["rain2"] = np.where(turnstile_rain["rain"] == 1, "raining",
                                   "not raining")
turnstile_rain.groupby("rain2").describe()

turnstile_rain = turnstile_weather[[
    "rain", "ENTRIESn_hourly", "EXITSn_hourly"
]]
turnstile_rain["ENTRIESn_hourly_log10"] = np.log10(
    turnstile_rain["ENTRIESn_hourly"] + 1)
turnstile_rain["rain2"] = np.where(turnstile_rain["rain"] == 1, "raining",
                                   "not raining")
set1 = brewer2mpl.get_map('Set1', 'qualitative', 3).mpl_colors
plot = gg.ggplot(turnstile_rain, gg.aes(x="ENTRIESn_hourly_log10", color="rain2")) + \
       gg.geom_density() + \
       gg.facet_wrap("rain2", scales="fixed") + \
       gg.scale_colour_manual(values=set1) + \
       gg.xlab("log10(entries per hour)") + \
       gg.ylab("Number of turnstiles") + \
       gg.ggtitle("Entries per hour whilst raining and not raining")
plot

np.random.seed(42)
data = pd.Series(np.random.normal(loc=180, scale=40, size=600))
data.hist()

p = turnstile_weather["ENTRIESn_hourly"].hist()
pylab.suptitle("Entries per hour across all stations")
pylab.xlabel("Entries per hour")
pylab.ylabel("Number of occurrences")
Example #22
0
 def test_ndim_2_facet_wrap_subplots(self):
     p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap('cut', 'clarity')
     fig, subplots = p.make_facets()
     nrow, ncol = subplots.shape
     self.assertEqual(nrow, 7)
     self.assertEqual(ncol, 6)
Example #23
0
    #split percentiles into different charts, all sites
    #plt1 = gg.ggplot(df_along, gg.aes(x='n_passes',y='value',color='site_str'))+gg.geom_point()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.theme_bw()+gg.xlim(0,100)+gg.facet_wrap('yparam',scales='free_y')
    #plt1.save(filename = r'..\charts\bias_{0}.png'.format(c['name']), width=None, height=None, dpi=200)
    #n_segments
    plt2 = gg.ggplot(
        df_a, gg.aes(x='n_passes', y='n_segments', color='site_str')
    ) + gg.geom_line() + gg.xlab('n, number drive periods') + gg.ylab(
        'Sample size (number of drive patterns)') + gg.theme_bw() + gg.xlim(
            0, 35) + gg.ylim(0, 2000)
    plt2.save(filename=r'..\charts\n_segments_{0}_{1}.png'.format(
        c['name'], dtstamp),
              width=None,
              height=None,
              dpi=200)
    #combine percentiles, split sites
    plt3 = gg.ggplot(
        df_along, gg.aes(x='n_passes', y='value', color='yparam')
    ) + gg.geom_line() + gg.xlab('n, number of drive periods') + gg.ylab(
        'Sample error (%)') + gg.theme_bw() + gg.xlim(0, 35) + gg.ylim(
            -100, 100) + gg.geom_hline(
                y=25, linetype="dashed", color="gray") + gg.geom_hline(
                    y=-25, linetype="dashed", color="gray") + gg.geom_vline(
                        x=[10, 15], linetype="dashed",
                        color="gray") + gg.scale_color_manual(
                            values=colors) + gg.facet_wrap('site_str')
    plt3.save(filename=r'..\charts\percentiles_{0}_{1}.png'.format(
        c['name'], dtstamp),
              width=None,
              height=None,
              dpi=200)
#total-based
dftmp = df[['n_sub']+brks[:5]].melt(id_vars=['n_sub'],value_vars=brks[:5], var_name = 'stat',value_name = 'value')
dftmp['method']=['(Total-Expected Total)/Expected Total']*dftmp['n_sub'].size
df_stacked = dftmp
#enhancement-based
dftmp = df[['n_sub']+brks[5:10]].melt(id_vars=['n_sub'],value_vars=brks[5:10], var_name = 'stat',value_name = 'value')
dftmp['method']=['(Enhanc-Expected Enhanc)/Expected Enhanc']*dftmp['n_sub'].size
df_stacked = df_stacked.append(dftmp)
#enhancements + full sample background
dftmp = df[['n_sub']+brks[10:]].melt(id_vars=['n_sub'],value_vars=brks[10:], var_name = 'stat',value_name = 'value')
dftmp['method']=['(Enhanc+Expected Backgr-Expected Total)/Expected Total']*dftmp['n_sub'].size
df_stacked = df_stacked.append(dftmp)
df_stacked['percentile']=['{0}th%'.format(a[1:3]) for a in df_stacked['stat']]
#plots
#compare all 3
plt1 = gg.ggplot(df_stacked, gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.theme_bw()+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.facet_wrap('method')+gg.ggtitle('Bias comparison {0}'.format(title))
plt1.save(filename = r'..\charts\drivebias_laqn_{0}.png'.format(species), width=None, height=None, dpi=300)

#plot total alone for presenation
plt2 = gg.ggplot(df_stacked[df_stacked['method']=='(Total-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title))
t = gg.theme_bw()
t._rcParams['font.size']=16
plt2 = plt2+t
plt2.save(filename = r'..\charts\drivebias_laqn_{0}_total.png'.format(species), width=None, height=None, dpi=300)

#plot enhancement alone for presenation
plt3 = gg.ggplot(df_stacked[df_stacked['method']=='(Enhanc+Expected Backgr-Expected Total)/Expected Total'], gg.aes(x='n_sub',y='value',color='percentile'))+gg.geom_line()+gg.xlab('N drives')+gg.ylab('Bias (%)')+gg.ylim(-100,100)+gg.scale_color_manual(values=colors)+gg.geom_hline(y=[-25,25],linetype="dashed",color="gray")+gg.geom_vline(x=[10,15],linetype="dashed",color="gray")+gg.ggtitle('Bias comparison {0}'.format(title))
t = gg.theme_bw()
t._rcParams['font.size']=16
plt3 = plt3+t
plt3.save(filename = r'..\charts\drivebias_laqn_{0}_enhanc.png'.format(species), width=None, height=None, dpi=300)
Example #25
0
 def test_facet_wrap_ncol(self):
     p = gg.ggplot(gg.aes(x='price'), gg.diamonds) + gg.facet_wrap('cut', ncol=2)
     nrow, ncol = p.facets.nrow, p.facets.ncol
     self.assertEqual(nrow, 3)
     self.assertEqual(ncol, 2)
Example #26
0
def plotHistogramMeans(hist,fileName):
  num_clust = hist.shape[0]
  IDS = np.mat(range(0,num_clust))
  IDS = IDS.reshape(num_clust,1)

  histD = np.concatenate((IDS,hist),axis=1)

  Data = pd.DataFrame(histD,columns = ['ID']+range(0,hist.shape[1]))
  Melted = pd.melt(Data,id_vars=['ID'])
  pv =  ggplot.ggplot( ggplot.aes(x='variable',y='value'),data=Melted) +  ggplot.geom_line()  + ggplot.facet_wrap("ID")
  print "Saving mean histograms"
  ggplot.ggsave(pv,'./IMG/'+fileName)
Example #27
0
import pandas as pd
meat = gp.meat


p=gp.ggplot(gp.aes(x='date',y='beef'),data=meat)+gp.geom_point(color='red')+gp.ggtitle(u'散点图')
print (p)
p=gp.ggplot(gp.aes(x='date',y='beef'),data=meat)+gp.geom_line(color='blue')+gp.ggtitle(u'折线图')
print (p)
p=gp.ggplot(gp.aes(x='date',y='beef'),data=meat)+gp.geom_point(color='red')+gp.geom_line(color='blue')+gp.ggtitle(u'散点图+折线图')
print (p)

# 将想要表达的变量组成一列
meat_lng = pd.melt(meat[['date','beef','pork','broilers']],id_vars='date')
# meat_lng包含了date,value(变量的值组成的列),variable(变量的名称组成的列)
p = gp.ggplot(gp.aes(x='date',y='value',colour='variable'),data=meat_lng)+\
    gp.geom_point()+gp.geom_line()
print (p)




meat_lng = pd.melt(meat[['date','beef','pork','broilers']],id_vars='date')
p = gp.ggplot(gp.aes(x='date',y='value',colour='variable'),data=meat_lng)+gp.geom_point()+gp.facet_wrap('variable')
print (p)

p = gp.ggplot(gp.aes(x='beef'),data=meat)+gp.geom_histogram()
print (p)

meat_lng = pd.melt(meat[['date','beef','pork']],id_vars='date')
p = gp.ggplot(gp.aes(x='value'),data=meat_lng)+gp.facet_wrap('variable')+gp.geom_histogram()
print (p)