Exemple #1
0
def graph3(score_data):
    """ Box plot for scores;
        Creates and returns graph 3, a box plot. """

    date_column = score_data[0][find_time_stamp(score_data)]
    data = DataFrame(score_data[1:], columns=score_data[0])

    # Get all columns that are numerical questions
    num_questions = data.select_dtypes(include=['int64']).columns.values

    # Melt data so that each question is in a seperate row
    new_data = pd.melt(data,
                       id_vars=[date_column, "Name"],
                       value_vars=num_questions,
                       var_name="Question",
                       value_name="Score")

    # Get rid of unecessary column
    new_data = new_data.drop('Name', axis=1)

    # Convert date string into an actual date type
    new_data[date_column] = pd.to_datetime(new_data[date_column],
                                           format="%m/%d/%Y")

    # Create box plot graph
    box_plot = ggplot.ggplot(ggplot.aes(x=date_column, y='Score'), new_data) +\
        ggplot.geom_boxplot() +\
        ggplot.ggtitle("Distribution of Question Scores over Time")
    return box_plot
Exemple #2
0
    def boxplot(self, conn, column, table_chosen, title):

        data_df = dfile.single_selector(conn=conn,
                                        table=table_chosen,
                                        column=column)

        box_plot = ggplot(
            aes(x=column),
            data=data_df) + geom_boxplot() + theme_gray() + labs(title=title)
        now = datetime.datetime.now()
        b = now
        print(b)
        print(b - a)
        print(box_plot)
Exemple #3
0
p += scale_x_continuous(limits=(-25, 25))
p += ggtitle("sarimax coefficient magnitude distribution")
p += facet_wrap("feature", ncol=3, scales="free")
p += labs(x=" ", y=" ")

# visuals
t = theme_gray()
t._rcParams['font.size'] = 10
t._rcParams['font.family'] = 'monospace'

p += t
p.save("arima_1/" + "histogram.png")

# boxplot
p = ggplot(aes(x='variable', y='value'), data=master_df)
p += geom_boxplot()
p += scale_y_continuous(limits=(-25, 25))
p += ggtitle("sarimax coefficient magnitudes")
p += facet_wrap("feature", ncol=3)
p += labs(x=" ", y=" ")

# visuals
t = theme_gray()
t._rcParams['font.size'] = 10
t._rcParams['font.family'] = 'monospace'

p += t
p.save("arima_1/" + "boxplot.png")

for feature in [
        "home_goal", "away_goal", "home_yellow", "away_yellow", "home_red",
Exemple #4
0
        tmp[i] = tmp[i] / float(tmp.ix[0][i])
    tmp = tmp.drop(tmp.index[[0]])
    data[n] = tmp

max_val = []
n_val = []
iterations = map(lambda x: str(x), range(2, 13))
for iteration in iterations:
    data_i = data[iteration]
    max_val += data_i.max().tolist()
    n_val += [iteration] * data_i.shape[1]

rr = pandas.DataFrame([n_val, max_val]).T
rr.columns = ['Iteration', 'Max']

ggplot.ggplot(rr, ggplot.aes(x='Iteration', y='Max')) + ggplot.geom_boxplot()

vals = []
n_val = []
iterations = map(lambda x: str(x), range(2, 13))
for iteration in iterations:
    data_i = data[iteration]
    vals += (data_i == 0).sum().tolist()
    n_val += [iteration] * data_i.shape[1]

rr = pandas.DataFrame([n_val, vals]).T
rr.columns = ['Iteration', 'Equal to 0']

ggplot.ggplot(rr, ggplot.aes(x='Iteration',
                             y='Equal to 0')) + ggplot.geom_boxplot()
    xlab("armed forces rate") + ylab("IncomePerPerson") + ggtitle("Gapminder")


##Regression week 1
###armedforcesrate=data['armedforcesrate'][(data['armedforcesrate'] >= 0)].values

from ggplot import ggplot, aes, geom_boxplot
##filter by europe
europe_onlyarmedforcesrate=data[(data['European'] == 'Europe')]

pandas.DataFrame.describe(europe_onlyarmedforcesrate)

pandas.DataFrame.describe(data)


ggplot(europe_onlyarmedforcesrate, aes(x='armedforcesrate', y='NATO_EU_MEMBERSHIP')) + geom_boxplot() +\
    xlab("armed forces rate") + ylab(" Nato EU membership status") + ggtitle("Boxplot for armed forces rates for gapminder Nato Eu membership for European countries")

##income per person

ggplot(europe_onlyarmedforcesrate[(europe_onlyarmedforcesrate['polityscore'] >= -12) & (europe_onlyarmedforcesrate['incomeperperson'] >=0)]
, aes(x='incomeperperson', y='NATO_EU_MEMBERSHIP')) + geom_boxplot() +\
    xlab("armed forces rate") + ylab(" Nato EU membership status") + ggtitle("Boxplot for incom per person score for gapminder Nato Eu membership for European countries")



##Regression week 2