Python geom_bar Exemples, ggplot.geom_bar Python Exemples

Exemple #1

0

Afficher le fichier

	def bar_chart(self, conn, column1, column2, table_chosen, title):
		# since this is a bar graph only two columns will be there

		data_df = dfile.double_selector(conn = conn, table= table_chosen, col1 = column1, col2 = column2)

		bar_plot = ggplot(aes(x=column1, weight=column2), data=data_df) + geom_bar() + labs(title=title)
		print(bar_plot)

Exemple #2

0

Afficher le fichier

    def two_var_intr_effects(self, target, vars, nval=100, plot=True):
        """ Loads first level interactions.
        Args:
          target - Variable identifier (column name or number) specifying the
                   target variable
          vars - List of variable identifiers (column names or numbers) specifying
                 other selected variables. Must not contain target
          nval - Number of evaluation points used for calculation.
          plot - Determines whether or not to plot results.
        Returns: Pandas dataframe of interaction effects
        """
        # Check if null.models have already been generated
        check_str = """
                function(){
                  if(exists("null.models")){
                    return(T)
                  } else {
                    return(F)
                  }
                }
                """
        if not robjects.r(check_str)()[0]:
            self.logger.info(
                'Null models not generated, generating null models '
                '(n=10)')
            self._generate_interaction_null_models(10, quiet=False)

        int_str = """
              function(target, vars, nval){
                interactions <- twovarint(tvar=target, vars=vars, null.models, 
                                          nval=nval, plot=F)
              }
              """
        # Check the input type. If int, add one, if string do nothing.
        target = target if type(target) is str else target + 1
        vars = [var if type(var) is str else var + 1 for var in vars]
        r_interact = robjects.r(int_str)(target,
                                         robjects.Vector(np.array(vars)), nval)
        interact = pd.DataFrame(
            {
                'interact_str': list(r_interact[0]),
                'exp_null_int': list(r_interact[1]),
                'std_null_int': list(r_interact[2])
            },
            index=vars)

        if plot:
            int_effects = interact.reset_index().rename(
                columns={'index': 'vars'})
            int_effects_m = pd.melt(
                int_effects,
                id_vars='vars',
                value_vars=['interact_str', 'exp_null_int'])
            p = gg.ggplot(gg.aes(x='vars', fill='variable', weight='value'),
                          data=int_effects_m) \
                + gg.geom_bar() \
                + gg.labs(
                    title='Two-var interaction effects - {}'.format(target))
            print(p)
        return interact

Exemple #3

0

Afficher le fichier

Fichier : vcf_to_sfs_v2.py Projet : paubolivar/python_homework

 def plot_sfs(self, pat_out):
     df = pd.DataFrame({
         "freq": [i for i in range(1, len(self.sfs))],
         "sfs": np.array(self.sfs[1:len(self.sfs)])
     })
     print df
     pl = ggplot(df, aes(x="freq", weight="sfs")) + geom_bar()
     pl.save(pat_out)

Exemple #4

0

Afficher le fichier

Fichier : visualizer.py Projet : joshtch/IOT-Simulation

 def plotAverageLatency(self):
     averages = [d.averageLatency() for d in self.data]
     dat = {"device": range(1, len(averages) + 1), "average": averages}
     dataframe = pandas.DataFrame(dat)
     chart = ggplot.ggplot(ggplot.aes(x="device", weight="average"), dataframe) \
       + ggplot.labs(title="Average Latency Per Device") + \
       ggplot.ylab("Average Latency (ms)") + \
       ggplot.xlab("Device Number")  + \
       ggplot.geom_bar(stat="identity")
     chart.show()

Exemple #5

0

Afficher le fichier

Fichier : visualizer.py Projet : gabe-terrell/IOT-Simulation

	def plotAverageLatency(self):
		averages = [d.averageLatency() for d in self.data]
		dat = { "device" : range(1, len(averages) + 1), "average" : averages }
		dataframe = pandas.DataFrame(dat)
		chart = ggplot.ggplot(ggplot.aes(x="device", weight="average"), dataframe) \
				+ ggplot.labs(title="Average Latency Per Device") + \
				ggplot.ylab("Average Latency (ms)") + \
				ggplot.xlab("Device Number")  + \
				ggplot.geom_bar(stat="identity")
		chart.show()

Exemple #6

0

Afficher le fichier

Fichier : validateplot.py Projet : tenxcloud/bcbio-nextgen

def _ggplot(df, out_file):
    """Plot faceted items with ggplot wrapper on top of matplotlib.
    XXX Not yet functional
    """
    import ggplot as gg
    df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]]
    df["category"] = [cat_labels[x] for x in df["category"]]
    df["caller"] = [caller_labels.get(x, None) for x in df["caller"]]
    p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar() +
         gg.facet_wrap("variant.type", "category") + gg.theme_seaborn())
    gg.ggsave(p, out_file)

Exemple #7

0

Afficher le fichier

Fichier : validateplot.py Projet : chapmanb/bcbio-nextgen

def _ggplot(df, out_file):
    """Plot faceted items with ggplot wrapper on top of matplotlib.
    XXX Not yet functional
    """
    import ggplot as gg
    df["variant.type"] = [vtype_labels[x] for x in df["variant.type"]]
    df["category"] = [cat_labels[x] for x in df["category"]]
    df["caller"] = [caller_labels.get(x, None) for x in df["caller"]]
    p = (gg.ggplot(df, gg.aes(x="caller", y="value.floor")) + gg.geom_bar()
         + gg.facet_wrap("variant.type", "category")
         + gg.theme_seaborn())
    gg.ggsave(p, out_file)

Exemple #8

0

Afficher le fichier

    def generate_intr_effects(self, nval=10, n=10, quiet=False, plot=True):
        """ Loads R variable interaction effect objects
        Args:
          nval - Number of evaluation points used for calculation
          n - Number of null models to generate for interaction calibaration
          quiet - Determines whether to print intermediate data.
        Returns: Pandas dataframe of interaction effects
        """

        self._generate_interaction_null_models(n, quiet)

        int_str = """
              function(ncols, nval){
                if(exists("null.models")){
                  interactions <- interact(c(1:ncols), null.models, 
                                           nval=nval, 
                                           plot=F)
                  } else {
                    interactions <- interact(c(1:ncols), nval=nval, plot=F)
                }
              }
              """
        ncols = len(self._data['x'].columns.values)
        r_interact = robjects.r(int_str)(ncols, nval)

        interact = pd.DataFrame(
            {
                'interact_str': list(r_interact[0]),
                'exp_null_int': list(r_interact[1]),
                'std_null_int': list(r_interact[2])
            },
            index=self._data['x'].columns)

        self._interaction_effects = interact

        if plot:
            int_effects = interact.reset_index().rename(
                columns={'index': 'vars'})
            int_effects_m = pd.melt(
                int_effects,
                id_vars='vars',
                value_vars=['interact_str', 'exp_null_int'])
            p = gg.ggplot(gg.aes(x='vars', fill='variable', weight='value'),
                          data=int_effects_m) \
                + gg.geom_bar() \
                + gg(title='Interaction Effects')
            print(p)

Exemple #9

0

Afficher le fichier

Fichier : tasks.py Projet : jshousephd/temposeqcount

def plotAlignmentStat(input, output):
    """plot Alignment summary using ggplot"""
    df = pd.read_csv(input, thousands=",")
    # replace % with '' and convert the type to float
    #df.replace('%', '', regex=True)
    print df.dtypes
    # convert to numeric
    #df1=df.apply(pd.to_numeric, args=('coerce',))
    # Get certain rows
    print df
    df = df.iloc[[2, 4, 5], ]
    #df = df.ix[['Uniquely mapped reads %', 'Number of reads mapped to multiple loci %', 'Reads unmapped: too short %']]
    dfm = pd.melt(df,
                  id_vars=['category'],
                  var_name='sampleName',
                  value_name='Value')

    print dfm
    #from ggplot import *
    #import pandas as pd
    #df = pd.DataFrame({"x":[1,2,3,4], "y":[1,3,4,2]})
    #ggplot(aes(x="x", weight="y"), df) + geom_bar()
    #ggplot(diamonds, aes(x='price', fill='cut')) + geom_histogram() +  theme_bw() + scale_color_brewer(type='qual')

    from ggplot import ggplot, geom_bar, aes, theme_bw, ggtitle, coord_flip, geom_histogram  #,scale_y_continuous,coord_flip
    p = ggplot(dfm, aes(x='sampleName', weight='Value',
                        fill='category')) + geom_bar() + theme_bw() + ggtitle(
                            "Alignment Summary stats") + coord_flip(
                            )  # + scale_y_continuous(labels='comma

    #p = ggplot(dfm, aes(x='sampleName', weight='Value', fill='category')) + geom_bar(position = "stack", stat='identity') + theme_bw() + ggtitle("Alignment Summary stats")  + coord_flip()# + scale_y_continuous(labels='comma') + coord_flip()
    #p = ggplot(df, aes(x = "category", y = "value", fill = "variable")) + \
    #geom_bar(stat="bar", labels=df["category"].tolist()) + \
    #theme(axis_text_x = element_text(angle=90))
    dirname, filename = os.path.split(output)
    print dirname
    print filename
    p.save(output)
    #ggsave(plot=p, filename=filename, path=dirname)
    return

Exemple #10

0

Afficher le fichier

def graph2(score_data):
    """ Average scores for each question on most recent date;
        Creates and returns graph 2, a bar graph. """

    date_column = score_data[0][find_time_stamp(score_data)]

    columns_data = score_data[0]
    for i in range(0, len(columns_data)):
        columns_data[i] = columns_data[i].split('.')[0]

    data = DataFrame(score_data[1:], columns=columns_data)

    # Get all columns that are numerical questions so we know what to graph
    num_questions = data.select_dtypes(include=['int64']).columns.values

    # Melt data so that each question is in a seperate row
    new_data = pd.melt(data,
                       id_vars=date_column,
                       value_vars=num_questions,
                       var_name="Question",
                       value_name="Score")

    # Convert date string into actual data type
    new_data[date_column] = pd.to_datetime(new_data[date_column],
                                           format="%m/%d/%Y")

    # Latest Dates
    recent_date = new_data[date_column].max()

    # Removing all dates that are recent
    new_data = new_data[new_data.Timestamp == recent_date]

    # Group all rows with question, and then take the average.
    new_data = new_data.groupby(['Question']).mean().reset_index()

    # Create bar graph with data from past week
    ret = ggplot.ggplot(ggplot.aes(x="Question", weight="Score"), new_data) +\
        ggplot.geom_bar() +\
        ggplot.ggtitle("Most Recent Average Scores")
    return ret

Exemple #11

0

Afficher le fichier

def plot1(data):
    xvar = data['teamID' == 'SFN']
    yvar = data['teamID' == 'LAN']
    gg = ggplot(data, aes(xvar, yvar)) + geom_bar()
    return gg

Exemple #12

0

Afficher le fichier

def plot_weather_data(turnstile_weather):
    '''
    You are passed in a dataframe called turnstile_weather.
    Use turnstile_weather along with ggplot to make a data visualization
    focused on the MTA and weather data we used in assignment #3.
    You should feel free to implement something that we discussed in class
    (e.g., scatterplots, line plots, or histograms) or attempt to implement
    something more advanced if you'd like.

    Here are some suggestions for things to investigate and illustrate:
     * Ridership by time of day or day of week
     * How ridership varies based on Subway station
     * Which stations have more exits or entries at different times of day

    If you'd like to learn more about ggplot and its capabilities, take
    a look at the documentation at:
    https://pypi.python.org/pypi/ggplot/
    You can check out:
    https://www.dropbox.com/s/meyki2wl9xfa7yk/\n
    turnstile_data_master_with_weather.csv

    To see all the columns and data points included in the turnstile_weather
    dataframe.

    However, due to the limitation of our Amazon EC2 server, we are giving
    you about 1/3 of the actual data in the turnstile_weather dataframe
    '''

    df = turnstile_weather.copy()

    # we will remove national holidays from the data. May 30 is Memorial Day,
    # the only national holiday in our data set. Normally this would be done
    # by passing in the data more elegantly, but since this is a bit more
    # constrained, we will simply hard code it into the function.
    national_holidays = ['2011-05-30']
    for holiday in national_holidays:
        df = df[df.DATEn != holiday]

    # add a column to represent the ISO day of the week for each data point.
    df[u'weekday'] = df[u'DATEn'].apply(\
            lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').isoweekday())

    ##now introduce a multiplier variable so that the ENTRIESn_hourly
    ##values can be modified when we have multiple data days. For example
    ##if we have 2 fridays with rain the multiplier is 1/2 so that summing
    ##the modified values will give us the average number of riders
    ##entering the subways system on a rainy friday.

    for day in df.weekday.unique():
        for rain_status in df.rain.unique():

            # number of unique dates with the same weekday and rain status
            u = df[(df.weekday == day) & (df.rain == rain_status)].\
                DATEn.nunique()

            if u != 0:
                multiplier = float(1.0 / u)
            else:
                multiplier = 0

            daily_sum = \
                df[(df.weekday == day) & (df.rain == rain_status)].sum()

            entries_sum = daily_sum.ENTRIESn_hourly

            multiplier_index_list = \
                df[(df.weekday == day) & (df.rain == rain_status)].index

            df.loc[multiplier_index_list, u'ENTRIESn_hourly'] = \
                multiplier * entries_sum

    ##now we have a dataframe wich is ready to be utilized for making our
    ##plot using the data contained within.

    p = ggplot.ggplot(ggplot.aes(x = u'factor(weekday)', \
                                 weight = u'ENTRIESn_hourly', \
                                 fill = u'weekday'),\
                      data = df) +\
        ggplot.geom_bar() +\
        ggplot.facet_grid(x = u'rain', y = u'weekday') +\
        ggplot.ggtitle('Average Ridership on Sunny & Rainy ISO Weekdays')
    print p
    return p

Exemple #13

0

Afficher le fichier

Fichier : scanWormFolders.py Projet : ails-institute/DeepLongevity

    pd.to_datetime(re.sub(r'\scrop_small.png','', x),
                   format='%Y-%m-%dt%H%M'))
wormID = 0
data['lifeSpanHours'] = 0
data['lifeSpanDays'] = 0
data['elapsedHours'] = 0
data['elapsedDays'] = 0
data['wormID'] = ''

for iWorm in data['path'].unique():
    data.loc[data['path'] == iWorm, 'lifeSpanDays'] =  (data.loc[data['path'] == iWorm, 'date'].max()-data.loc[data['path'] == iWorm, 'date'].min()).days
    data.loc[data['path'] == iWorm, 'lifeSpanHours'] =  (data.loc[data['path'] == iWorm, 'date'].max()-data.loc[data['path'] == iWorm, 'date'].min()).seconds/3600 + data.loc[data['path'] == iWorm, 'lifeSpanDays'] * 24
    data.loc[data['path'] == iWorm, 'elapsedDays'] =  (data.loc[data['path'] == iWorm, 'date'] - data.loc[data['path'] == iWorm, 'date'].min())/(86400*1e9)
    data.loc[data['path'] == iWorm, 'elapsedHours'] =  (data.loc[data['path'] == iWorm, 'date'] - data.loc[data['path'] == iWorm, 'date'].min())/(3600*1e9)
    data.loc[data['path'] == iWorm, 'wormID'] = '%d' % wormID
    wormID += 1

data['lifeSpanDuration'] = pd.cut(data.lifeSpanDays, 2, labels=["short", "long"])
data.to_csv(dfFile, sep=',', encoding='utf-8', header=True, index=False)

data.lifeSpanDays.mean()
data.lifeSpanDays.max()
data.lifeSpanDays.min()

#dataMidPoint = data.loc[data['elapsedDays'] ==4 , :]
dataMidPoint = data.loc[data['elapsedDays'] ==1 , :]
dataMidPoint.to_csv(dfMidPointFile, sep=',', encoding='utf-8', header=True, index=False)

p = gg.ggplot(gg.aes(x='lifeSpanDuration'), data=dataMidPoint)
p + gg.geom_bar()

Exemple #14

0

Afficher le fichier

#####################################################################################
# Here is an example of using Rodeo:

# We'll use the popular package called Pandas
# Install it with pip
! pip install pandas

# Import it as 'pd'
import pandas as pd

# Create a dataframe
df=pd.DataFrame({"Animal":["dog","dolphin","chicken","ant","spider"],"Legs":[4,0,2,6,8]})
df.head()

#####################################################################################
# An example of making a plot:
! pip install ggplot

from ggplot import ggplot, aes, geom_bar

ggplot(df, aes(x="Animal", weight="Legs")) + geom_bar(fill='blue')

# Find this tutorial helpful?  Checkout the blue sidebar for more tutorials!
import freegames



from turtle import *
from random import randrange
from freegames import square, vector

Exemple #15

0

Afficher le fichier

Fichier : explore.py Projet : ekalosak/ml_ds

import os
from os.path import join, splitext, exists, abspath, basename

import pandas as pd
import pdb
import ggplot as ggp

import utils

log = utils.makeLogger('titanic-data-exploration')

# Load dataframe
DFN = abspath(join(os.curdir, os.pardir, 'datasets', 'titanic-train.csv'))
df = pd.read_csv(DFN)
log.debug("{} has columns: {}".format(basename(DFN), df.columns.tolist()))

# Plot exploratory data analysis
log.debug("Plotting exploratory slices")
pt = ggp.ggplot(ggp.aes(x='Sex', fill='Survived'), data=df) + \
             ggp.geom_bar()
# NOTE type "pt" into the debugger to show the plot

pdb.set_trace()

Exemple #16

0

Afficher le fichier

# With ggplot
import ggplot as gg

df_ = df.copy()
df_["cat"] = df_.index
df_melted = df_.melt(id_vars=["cat"])

cm2 = gg.ggplot(df_melted, gg.aes(x="cat", fill="variable", y ="value"))

cm2 += gg.xlab("category") + gg.ylab("frequency") +\
       gg.ggtitle("Confusion Matrix")
       


cm2 += gg.geom_bar(stat="identity", position="stack")

cm2

#%%

# With altair

import altair as alt


chart = alt.Chart(df_melted).mark_bar().encode(
    x='cat',
    y='value',
    color='variable'
)

Exemple #17

0

Afficher le fichier

Fichier : p4_1.py Projet : mestesugar/reflections

def plot_weather_data(turnstile_weather):

    '''
    You are passed in a dataframe called turnstile_weather.
    Use turnstile_weather along with ggplot to make a data visualization
    focused on the MTA and weather data we used in assignment #3.
    You should feel free to implement something that we discussed in class
    (e.g., scatterplots, line plots, or histograms) or attempt to implement
    something more advanced if you'd like.

    Here are some suggestions for things to investigate and illustrate:
     * Ridership by time of day or day of week
     * How ridership varies based on Subway station
     * Which stations have more exits or entries at different times of day

    If you'd like to learn more about ggplot and its capabilities, take
    a look at the documentation at:
    https://pypi.python.org/pypi/ggplot/
    You can check out:
    https://www.dropbox.com/s/meyki2wl9xfa7yk/\n
    turnstile_data_master_with_weather.csv

    To see all the columns and data points included in the turnstile_weather
    dataframe.

    However, due to the limitation of our Amazon EC2 server, we are giving
    you about 1/3 of the actual data in the turnstile_weather dataframe
    '''


    df = turnstile_weather.copy()
        
    # we will remove national holidays from the data. May 30 is Memorial Day,
    # the only national holiday in our data set. Normally this would be done
    # by passing in the data more elegantly, but since this is a bit more
    # constrained, we will simply hard code it into the function.
    national_holidays = ['2011-05-30']
    for holiday in national_holidays:
        df = df[df.DATEn != holiday]

    # add a column to represent the ISO day of the week for each data point.
    df[u'weekday'] = df[u'DATEn'].apply(\
            lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').isoweekday())

    ##now introduce a multiplier variable so that the ENTRIESn_hourly
    ##values can be modified when we have multiple data days. For example
    ##if we have 2 fridays with rain the multiplier is 1/2 so that summing
    ##the modified values will give us the average number of riders
    ##entering the subways system on a rainy friday.

    for day in df.weekday.unique():
        for rain_status in df.rain.unique():

            # number of unique dates with the same weekday and rain status
            u = df[(df.weekday == day) & (df.rain == rain_status)].\
                DATEn.nunique()

            if u != 0:
                multiplier = float(1.0 / u)
            else:
                multiplier = 0

            daily_sum = \
                df[(df.weekday == day) & (df.rain == rain_status)].sum()

            entries_sum = daily_sum.ENTRIESn_hourly

            multiplier_index_list = \
                df[(df.weekday == day) & (df.rain == rain_status)].index

            df.loc[multiplier_index_list, u'ENTRIESn_hourly'] = \
                multiplier * entries_sum

    ##now we have a dataframe wich is ready to be utilized for making our
    ##plot using the data contained within.

    p = ggplot.ggplot(ggplot.aes(x = u'factor(weekday)', \
                                 weight = u'ENTRIESn_hourly', \
                                 fill = u'weekday'),\
                      data = df) +\
        ggplot.geom_bar() +\
        ggplot.facet_grid(x = u'rain', y = u'weekday') +\
        ggplot.ggtitle('Average Ridership on Sunny & Rainy ISO Weekdays')
    print p
    return p

Exemple #18

0

Afficher le fichier

Fichier : p4_1_practice.py Projet : mestesugar/reflections

#! /usr/bin/env/ python

#================== This line is 79 spaces wide ==============================#

import ggplot

test_data = ggplot.mtcars.tail(15)
print test_data
##def average_weight(df, x_value, w_value):
##    for x_item in df.x_value.unique():
##        for w_item in df.w_value.unique():
##            print df[(df.x_value == x_item) & (df.w_value == w_item)].sum()            
                
##average_weight(test_data, 'cyl', 'carb')

p = ggplot.ggplot(ggplot.aes(x = 'factor(cyl)'), data = test_data) +\
    ggplot.geom_bar()
##    ggplot.facet_grid(x = u'cyl', y = u'gear')

print p

Exemple #19

0

Afficher le fichier

import ggplot as gg
import ultrasignup as us
import numpy as np

d = us.event_results(299)

p1 = gg.ggplot(
    gg.aes(x='time_hour',fill='gender'),d[(d.distance=='50K')&(d.time_hour>1.0)]) + \
  gg.facet_grid(x='gender') + \
  gg.geom_bar(stat="bin",binwidth=.5,position="dodge",colour="black") + \
  gg.xlab("Time (hours)") + gg.ylab("Number of Finishers") + \
  gg.ggtitle("50K Finishing Times for All Years")

p2 = gg.ggplot(
    gg.aes(x='time_hour',fill='gender'),d[(d.distance=='11 Miler')&(d.time_hour>1.0)]) + \
  gg.facet_grid(x='gender') + \
  gg.geom_bar(stat="bin",binwidth=.5,position="dodge",colour="black") + \
  gg.xlab("Time (hours)") + gg.ylab("Number of Finishers") + \
  gg.ggtitle("11M Finishing Times for All Years")

Exemple #20

0

Afficher le fichier

Fichier : module10.py Projet : blarosen95/LIS4930

def second(dataframe):
    plot = ggplot.ggplot(
        ggplot.aes(x='Speed'),
        data=dataframe) + ggplot.geom_bar(color='lightblue') + ggplot.ggtitle(
            "Frequencies of Speeds Among Interfaces") + ggplot.theme_xkcd()
    plot.show()

Exemple #21

0

Afficher le fichier

        },
        index=range(t * len(count_tops),
                    t * len(count_tops) + len(count_tops)))
    probs_list.append(probs_t)
    # Calculate KL divergences
    kl_mle_list.append(stats.entropy(true_bins_t, mle_probs_vals))
    kl_nn_list.append(stats.entropy(true_bins_t, nn_probs_t))

probs = pd.concat(probs_list)

# In[44]:

probs_tail = probs[probs.Tenor > 360]

gg.ggplot(probs_tail, gg.aes(x='Count Top', weight='Probs True')
          ) + gg.facet_grid('Tenor') + gg.geom_bar() + gg.geom_step(
              gg.aes(y='Probs MLE', color='red')) + gg.geom_step(
                  gg.aes(y='Probs NN', color='blue')) + gg.scale_x_continuous(
                      limits=(0, len(count_tops)))

# In[57]:

# KL divergences

kl_df = pd.DataFrame({
    'Tenor': range(0, t_end + 1),
    'KL MLE': kl_mle_list,
    'KL NN': kl_nn_list
})

print kl_df.head()

Exemple #22

0

Afficher le fichier

#! /usr/bin/env/ python

#================== This line is 79 spaces wide ==============================#

import ggplot

test_data = ggplot.mtcars.tail(15)
print test_data
##def average_weight(df, x_value, w_value):
##    for x_item in df.x_value.unique():
##        for w_item in df.w_value.unique():
##            print df[(df.x_value == x_item) & (df.w_value == w_item)].sum()

##average_weight(test_data, 'cyl', 'carb')

p = ggplot.ggplot(ggplot.aes(x = 'factor(cyl)'), data = test_data) +\
    ggplot.geom_bar()
##    ggplot.facet_grid(x = u'cyl', y = u'gear')

print p

Exemple #23

0

Afficher le fichier

        the_max = np.max(model2scores[model][plot_key_name])
        the_min = np.min(model2scores[model][plot_key_name])

        total = len(model2scores[model][plot_key_name])
        for value in model2scores[model][plot_key_name]:
            plot_dataset.append(
                [model, value, the_mean / total, the_std, the_max, the_min])

    plot_dataset_pd = pd.DataFrame(
        plot_dataset,
        columns=['model', 'value', 'weight', 'std', 'max', 'min'])

    if 'logloss' in plot_key_name:

        p = ggplot.ggplot(ggplot.aes(x = 'model', fill = 'model', weight = 'weight'), data = plot_dataset_pd) +\
        ggplot.geom_bar(position = 'stack', width = 4) +\
        ggplot.geom_errorbar(ggplot.aes(x = 'model', y = 'value')) +\
        ggplot.ylim(0 ,5.05) +\
        ggplot.ggtitle(plot_key_name)

        #print(p)

    elif 'time' in plot_key_name:

        p = ggplot.ggplot(ggplot.aes(x = 'model', fill = 'model', weight = 'weight'), data = plot_dataset_pd) +\
        ggplot.geom_bar(position = 'stack', width = 4) +\
        ggplot.geom_errorbar(ggplot.aes(x = 'model', y = 'value')) +\
        ggplot.ggtitle(plot_key_name)

        #print(p)

Exemple #24

0

Afficher le fichier

Fichier : initial-story.py Projet : qbektrix/rodeo

# For example, select the following lines
x = 7
x**2
# and remember to press COMMAND + ENTER

# You can also run code directly in the console below.

#####################################################################################
# Here is an example of using Rodeo:

# We'll use the popular package called Pandas
# Install it with pip
! pip install pandas

# Import it as 'pd'
import pandas as pd

# Create a dataframe
df=pd.DataFrame({"Animal":["dog","dolphin","chicken","ant","spider"],"Legs":[4,0,2,6,8]})
df.head()

#####################################################################################
# An example of making a plot:
! pip install ggplot

from ggplot import ggplot, aes, geom_bar

ggplot(df, aes(x="Animal", weight="Legs")) + geom_bar(fill='blue')

# Find this tutorial helpful?  Checkout the blue sidebar for more tutorials!