Exemple #1
0
def plot_distance_trip_time(df):
    num_rows = df.shape[0]
    title = 'trip duration v distance travelled'

    print ggplot(df, aes(s.TRIP_DURATION_COL, s.DISTANCE_TRAVELED_COL_NAME)) + \
            ggtitle(_make_title(title, num_rows))+ \
            stat_smooth(colour="red") + \
            geom_point(colour='steelblue') + \
            scale_x_continuous(
                    # breaks=[10,20,30], 
                    #labels=["horrible", "ok", "awesome"]
                    )

    return df 
Exemple #2
0
    def plot(self, inputs):
        """Plot the given X and Y axes on a scatter plot"""
        if inputs.year not in self.dat.Year.values:
            return

        if inputs.xvar not in self.dat or inputs.yvar not in self.dat:
            return

        subdat = self.dat[self.dat.Year == inputs.year]
        p = ggplot(subdat, aes(x=inputs.xvar, y=inputs.yvar))

        p = p + geom_point()
        if inputs.shownames:
            p = p + geom_text(aes(label=self.ID_col), vjust=1, hjust=1)
        if inputs.linear:
            p = p + stat_smooth(color="red", method="lm")
        return p
        if (reward == 1):
            wins_for_player_1[i] += 1.0
        elif (reward == 0.5):
            draw_for_players[i] += 1.0

    print(i, wins_for_player_1[i], draw_for_players[i])
    data.append({
        'Type': 0,
        'Wins': wins_for_player_1[i],
        'Training': training_steps * (i - 1)
    })
    data.append({
        'Type': 1,
        'Wins': draw_for_players[i],
        'Training': training_steps * (i - 1)
    })
    learnitMC(training_steps, epsilon, alpha, n)
#   learnit(training_steps, epsilon, alpha) # the original learning code.

# Pandas gives you the power of R
learningdf = pd.DataFrame(data)
# I use ggplot when I generate figures in R and would like to use it with Python, HOWEVER:
# latest Pandas causes problems for ggplot so I needed these two patches:
# https://stackoverflow.com/questions/50591982/importerror-cannot-import-name-timestamp/52378663
# https://github.com/yhat/ggpy/issues/612
p = gg.ggplot(gg.aes(x='Training', y='Wins', group='Type'), data=learningdf)+ gg.xlab('Learning games') + \
    gg.ylab('Wins for player 1') + gg.ggtitle("n="+str(n)) + gg.geom_point() + gg.stat_smooth(method='loess')
p.make()
filename = "experiment_" + str(n) + ".pdf"
p.save(filename)
slope = 0.3
x = randn(num) * 50. + 150.0 
y = randn(num) * 5 + x * slope
plt.scatter(x, y, c='b')


# In[72]:

# plt.scatter(x[(y < 1) & (y > -1)], y[(y < 1) & (y > -1)], c='r')
# np.argsort, np.sort, complicated index slicing
dframe = pd.DataFrame({'x': x, 'y': y})
g = sns.jointplot('x', 'y', data=dframe, kind="reg")


# ## Grab Python version of ggplot http://ggplot.yhathq.com/

# In[73]:

from ggplot import ggplot, aes, geom_line, stat_smooth, geom_dotplot, geom_point


# In[74]:

ggplot(aes(x='x', y='y'), data=dframe) + geom_point() + stat_smooth(colour='blue', span=0.2)


# In[ ]:



def plot_transmission_results(tx_results, percentage_decline, save_path,
                              path_names):

    #%% what are inputs?

    # transmission results
    # There'll be a folder called 'Runs prepared for ...'
    # all the folders inside that folder will have a CEPAC results folder.
    # tx_data is a dictionary and will have two keys, 'monthly' and 'popstats'
    # 'monthly' key will only have primary transmissions data
    tx_data = deepcopy(tx_results)
    t = 120
    total_var = 3
    total_val = 4
    # percentage decline
    # this is also dictionary of percentage decline values for each folder
    # having cepac results

    # save_path eaxact folder where you want to save your images

    # path_names will have paths to transmissions and sensitivity directories

    #%% plot percentage decline

    # geberate an environment object first
    # lets go for line plot
    data_plot = pd.DataFrame(
        columns=['x', 'Percentage decline', 'Transmissions', 'Variable'],
        index=range(0, total_var * total_val))
    data_in = pd.read_excel(
        os.path.join(path_names['transmission'], 'Input files',
                     'transmission_rate_multiplier_required_inputs.xlsx'))
    col = [
        'Incidence rate per 100 PY specific to high-risk group 1',
        'HIV uninfected individuals in high-risk group 1',
        'HIV infected individuals in high-risk group 1'
    ]
    col_adj = ['Incidence', 'Uninfected', 'Infected']
    data_in[col[0]] = data_in[col[0]].round(1)
    base_val = [np.float64(0.9), 2960000, 136400]
    y1_values = {col[0]: [], col[1]: [], col[2]: []}
    for var in percentage_decline:
        if 'HIV+' in var:
            y1_values[col[2]].append(percentage_decline[var])
        elif 'HIV-' in var:
            y1_values[col[1]].append(percentage_decline[var])
        elif 'Incidence' in var:
            y1_values[col[0]].append(percentage_decline[var])

    for i in range(len(col)):
        idx = data_in.loc[data_in.loc[:, col[i]] != base_val[i],
                          col[i]].index.values[0]
        data_plot.loc[idx - 1:idx + 3 - 1, 'x'] = data_in.loc[idx:idx + 3,
                                                              col[i]].values
        data_plot.loc[idx - 1:idx + 3 - 1, 'Variable'] = col_adj[i]
        data_plot.loc[idx - 1:idx + 3 - 1,
                      'Percentage decline'] = y1_values[col[i]]

    # plot
    df_float = data_plot.loc[data_plot.loc[:, 'Percentage decline'] <= 200, :]
    (ggplot(aes(x='x', y='Percentage decline'), df_float) + geom_line() +
     facet_wrap('Variable', scales='free')).save(
         os.path.join(save_path, 'Percentage decline'))
    del df_float

    #%% visualizing transmissions
    # index = range(time * number of values for each variable * number of variables)
    def set_abc(run, var_idx, var_name, var_value_idx):

        # set variable names
        data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                         'Variable'] = var_name

        # set variable value
        data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                         'Value'] = data_plot.loc[
                             data_plot.loc[:, 'Variable'] == var_name,
                             'x'].values[var_value_idx]

        if 'RunA' in run:
            data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                             'RunA tx'] = tx_data[var]['monthly'][run][
                                 'transmissions'].iloc[0:t].values
        elif 'RunB' in run:
            data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                             'RunB tx'] = tx_data[var]['monthly'][run][
                                 'transmissions'].iloc[0:t].values
        elif 'RunC' in run:
            data_plot_tx.loc[(var_idx - 1) * t:((var_idx - 1) * t) + t - 1,
                             'RunC tx'] = tx_data[var]['monthly'][run][
                                 'transmissions'].iloc[0:t].values

    data_plot_tx = pd.DataFrame(
        index=range(t * total_var * total_val),
        columns=['Variable', 'Value', 'RunA tx', 'RunB tx', 'RunC tx'])
    var_idx = -1
    var_val_idx = [-1, -1, -1]
    for var in tx_data:
        var_idx += 1
        if 'HIV+' in var:
            var_val_idx[2] += 1
            var_name = col_adj[2]
            for run in tx_data[var]['monthly']:
                set_abc(run, var_idx, var_name, var_val_idx[2])
        elif 'HIV-' in var:
            var_val_idx[1] += 1
            var_name = col_adj[1]
            for run in tx_data[var]['monthly']:
                set_abc(run, var_idx, var_name, var_val_idx[1])
        elif 'Incidence' in var:
            var_val_idx[0] += 1
            var_name = col_adj[0]
            for run in tx_data[var]['monthly']:
                set_abc(run, var_idx, var_name, var_val_idx[0])
        else:
            continue

    data_plot_tx['t'] = 0
    t_float = -1
    for row in data_plot_tx.index:
        if t_float == t - 1:
            t_float = -1
        t_float += 1
        data_plot_tx.loc[row, 't'] = t_float

    #%% plots for individual runs
    run_col = ['RunA tx', 'RunB tx', 'RunC tx']
    inci = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == 'Incidence', :]
    inf = data_plot_tx.loc[data_plot_tx.loc[:, 'Variable'] == 'Infected', :]
    uninf = data_plot_tx.loc[data_plot_tx.loc[:,
                                              'Variable'] == 'Uninfected', :]
    for i in run_col:
        (ggplot(aes(x='t', y=i, color='Value'), data_plot_tx) + geom_line() +
         facet_wrap('Variable', scales='free')).save(
             os.path.join(
                 save_path,
                 str(i + r'_transmissions for all variable all values')))
        (ggplot(aes(x='t', y=i), inci) + geom_line() +
         facet_wrap('Variable', 'Value', scales='free')).save(
             os.path.join(
                 save_path,
                 str(i + r'_plots for individual values of incidence')))
        (ggplot(aes(x='t', y=i), inf) + geom_line() +
         facet_wrap('Variable', 'Value', scales='free')).save(
             os.path.join(
                 save_path,
                 str(i +
                     r'_plots for individual values of infected population')))
        (ggplot(aes(x='t', y=i), uninf) + geom_line() +
         facet_wrap('Variable', 'Value', scales='free')).save(
             os.path.join(
                 save_path,
                 str(i +
                     '_plots for individual values of uninfected population')))

    #%% compare runs ABC
    data_plot_abc = {}
    for var in col_adj:
        float_df = pd.DataFrame(index=range(0, t * total_var * total_val),
                                columns=['t', 'Value', 'Transmissions', 'Run'])
        insert_idx = -1
        for val in data_plot.loc[data_plot.loc[:, 'Variable'] == var, 'x']:
            var_df = data_plot_tx.loc[data_plot_tx.loc[:,
                                                       'Variable'] == var, :]
            var_df = var_df.reset_index(drop=True)
            var_val_df = var_df.loc[var_df.loc[:, 'Value'] == val, :]
            var_val_df = var_val_df.reset_index(drop=True)
            for c in ['RunA tx', 'RunB tx', 'RunC tx']:
                insert_idx += 1
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             'Run'] = c
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             'Transmissions'] = var_val_df.loc[:, c].values
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             'Run'] = c
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             'Value'] = val
                float_df.loc[insert_idx * t:(insert_idx * t) + t - 1,
                             't'] = np.arange(t)
        data_plot_abc[var] = float_df.dropna()
        (ggplot(aes(x='t', y='Transmissions', color='Run'), float_df) +
         geom_line() + facet_wrap('Value', scales='free') + ggtitle(var)).save(
             os.path.join(
                 save_path,
                 str(var + '_comparison of transmissions in runs ABC')))

    #%% compare runs BC
    for var in data_plot_abc:
        float_df = data_plot_abc[var].loc[
            data_plot_abc[var].loc[:, 'Run'] != 'RunA tx', :]
        (ggplot(aes(x='t', y='Transmissions', color='Run'), float_df) +
         geom_line(alpha=0.2) + facet_wrap('Value', scales='free') +
         stat_smooth(method='loess', se=False) + ggtitle(var)).save(
             os.path.join(save_path,
                          str(var +
                              '_comparison of transmissions in runs BC')))

    return
 def _plot_scat_w_line(self, gp_aes):
     return gp_aes + gp.geom_point(color='coral') + gp.stat_smooth(span=.2, color='blue',
                                                                   se=False) + gp.theme_seaborn(
         context='talk')
Exemple #7
0
                                  for x in repeatedKnnResults],
                                 columns = ['p',
                                            'k',
                                            'cvAccuracy',
                                            'testAccuracy'])


ggdata = pandas.concat(
    [DataFrame({'log10(p)' : log10(knnResultsSimplified.p),
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'cv',
                'Accuracy' : knnResultsSimplified.cvAccuracy}),
     DataFrame({'log10(p)' : log10(knnResultsSimplified.p),
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'test',
                'Accuracy' : knnResultsSimplified.testAccuracy})],
    axis = 0
)

ggobj = ggplot.ggplot(
    data = ggdata,
    aesthetics = ggplot.aes(x='log10(p)', y='Accuracy',
                            color='type', group='type', linetype='type')
)
ggobj += ggplot.theme_bw()
# ggobj += ggplot.scale_x_log()
ggobj += ggplot.geom_point(alpha=0.6)
ggobj += ggplot.stat_smooth()
ggobj += ggplot.facet_wrap('k') 
print(ggobj)
Exemple #8
0
import ggplot
from ggplot import aes, meat, geom_line, stat_smooth

ggplot(aes(x='date', y='beef'), data=meat) +\
    geom_line() +\
    stat_smooth(colour='blue', span=0.2)
''' ggplot(diamonds, aes(x='carat', y='price', color='cut')) +\
    geom_point() +\
    scale_color_brewer(type='diverging', palette=4) +\
    xlab("Carats") + ylab("Price") + ggtitle("Diamonds")
    
ggplot(diamonds, aes(x='price', fill='cut')) +\
    geom_density(alpha=0.25) +\
    facet_wrap("clarity") '''
Exemple #9
0
import ggplot as gp
import pandas as pd
import numpy as np

crime = pd.read_csv('crimeRatesByState2005.csv')
# 去除全美平均值和华盛顿特区两个数据点
crime2 = crime[crime.state != 'United States']
crime2 = crime2[crime.state != 'District of Columbia']

print(
    gp.ggplot(gp.aes(x='murder', y='burglary'), data=crime2) +
    gp.geom_point() + gp.stat_smooth(method='loess', color='red'))