def graph3(score_data): """ Box plot for scores; Creates and returns graph 3, a box plot. """ date_column = score_data[0][find_time_stamp(score_data)] data = DataFrame(score_data[1:], columns=score_data[0]) # Get all columns that are numerical questions num_questions = data.select_dtypes(include=['int64']).columns.values # Melt data so that each question is in a seperate row new_data = pd.melt(data, id_vars=[date_column, "Name"], value_vars=num_questions, var_name="Question", value_name="Score") # Get rid of unecessary column new_data = new_data.drop('Name', axis=1) # Convert date string into an actual date type new_data[date_column] = pd.to_datetime(new_data[date_column], format="%m/%d/%Y") # Create box plot graph box_plot = ggplot.ggplot(ggplot.aes(x=date_column, y='Score'), new_data) +\ ggplot.geom_boxplot() +\ ggplot.ggtitle("Distribution of Question Scores over Time") return box_plot
def boxplot(self, conn, column, table_chosen, title): data_df = dfile.single_selector(conn=conn, table=table_chosen, column=column) box_plot = ggplot( aes(x=column), data=data_df) + geom_boxplot() + theme_gray() + labs(title=title) now = datetime.datetime.now() b = now print(b) print(b - a) print(box_plot)
p += scale_x_continuous(limits=(-25, 25)) p += ggtitle("sarimax coefficient magnitude distribution") p += facet_wrap("feature", ncol=3, scales="free") p += labs(x=" ", y=" ") # visuals t = theme_gray() t._rcParams['font.size'] = 10 t._rcParams['font.family'] = 'monospace' p += t p.save("arima_1/" + "histogram.png") # boxplot p = ggplot(aes(x='variable', y='value'), data=master_df) p += geom_boxplot() p += scale_y_continuous(limits=(-25, 25)) p += ggtitle("sarimax coefficient magnitudes") p += facet_wrap("feature", ncol=3) p += labs(x=" ", y=" ") # visuals t = theme_gray() t._rcParams['font.size'] = 10 t._rcParams['font.family'] = 'monospace' p += t p.save("arima_1/" + "boxplot.png") for feature in [ "home_goal", "away_goal", "home_yellow", "away_yellow", "home_red",
tmp[i] = tmp[i] / float(tmp.ix[0][i]) tmp = tmp.drop(tmp.index[[0]]) data[n] = tmp max_val = [] n_val = [] iterations = map(lambda x: str(x), range(2, 13)) for iteration in iterations: data_i = data[iteration] max_val += data_i.max().tolist() n_val += [iteration] * data_i.shape[1] rr = pandas.DataFrame([n_val, max_val]).T rr.columns = ['Iteration', 'Max'] ggplot.ggplot(rr, ggplot.aes(x='Iteration', y='Max')) + ggplot.geom_boxplot() vals = [] n_val = [] iterations = map(lambda x: str(x), range(2, 13)) for iteration in iterations: data_i = data[iteration] vals += (data_i == 0).sum().tolist() n_val += [iteration] * data_i.shape[1] rr = pandas.DataFrame([n_val, vals]).T rr.columns = ['Iteration', 'Equal to 0'] ggplot.ggplot(rr, ggplot.aes(x='Iteration', y='Equal to 0')) + ggplot.geom_boxplot()
xlab("armed forces rate") + ylab("IncomePerPerson") + ggtitle("Gapminder") ##Regression week 1 ###armedforcesrate=data['armedforcesrate'][(data['armedforcesrate'] >= 0)].values from ggplot import ggplot, aes, geom_boxplot ##filter by europe europe_onlyarmedforcesrate=data[(data['European'] == 'Europe')] pandas.DataFrame.describe(europe_onlyarmedforcesrate) pandas.DataFrame.describe(data) ggplot(europe_onlyarmedforcesrate, aes(x='armedforcesrate', y='NATO_EU_MEMBERSHIP')) + geom_boxplot() +\ xlab("armed forces rate") + ylab(" Nato EU membership status") + ggtitle("Boxplot for armed forces rates for gapminder Nato Eu membership for European countries") ##income per person ggplot(europe_onlyarmedforcesrate[(europe_onlyarmedforcesrate['polityscore'] >= -12) & (europe_onlyarmedforcesrate['incomeperperson'] >=0)] , aes(x='incomeperperson', y='NATO_EU_MEMBERSHIP')) + geom_boxplot() +\ xlab("armed forces rate") + ylab(" Nato EU membership status") + ggtitle("Boxplot for incom per person score for gapminder Nato Eu membership for European countries") ##Regression week 2