def gray_plot(data, min=0, max=1, name=""): reshape = importr('reshape') gg = ggplot2.ggplot(reshape.melt(data, id_var=['x', 'y'])) pg = gg + ggplot2.aes_string(x='L1',y='L2')+ \ ggplot2.geom_tile(ggplot2.aes_string(fill='value'))+ \ ggplot2.scale_fill_gradient(low="black", high="white",limits=FloatVector((min,max)))+ \ ggplot2.coord_equal() + ggplot2.scale_x_continuous(name) return pg
def gray_plot(data, min=0, max=1, name=""): reshape = importr('reshape') gg = ggplot2.ggplot(reshape.melt(data,id_var=['x','y'])) pg = gg + ggplot2.aes_string(x='L1',y='L2')+ \ ggplot2.geom_tile(ggplot2.aes_string(fill='value'))+ \ ggplot2.scale_fill_gradient(low="black", high="white",limits=FloatVector((min,max)))+ \ ggplot2.coord_equal() + ggplot2.scale_x_continuous(name) return pg
## note that different from R dot . is not valid for Python variable names! IL_railroads = robjects.r('IL.railroads') IL_final = robjects.r('IL.final') ## import device driver from R with importr to plot to PNG ## we can then call any function in the grdevices package grdevices = importr('grDevices') grdevices.png(file='/Users/user/Downloads/data/mapplot.png', width=1300, height=1000) ## plot the map ## note that the order matters when we add another layer in ggplot (here IL_railroads): first aes, then data, that's different from R ## (see http://permalink.gmane.org/gmane.comp.python.rpy/2349) ## note that we use dictionary to set the opts to be able to set options as keywords, for example legend.key.size p_map = ggplot2.ggplot(IL_final) + \ ggplot2.geom_polygon(ggplot2.aes(x = 'long', y = 'lat', group = 'group', color = 'ObamaShare', fill = 'ObamaShare')) + \ ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \ ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \ ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \ ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \ 'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \ 'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \ 'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \ 'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \ 'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \ 'axis.text.y': ggplot2.theme_blank()} ) + \ ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \ ggplot2.coord_equal() p_map.plot() ## add the scatterplot
IL_final = robjects.r('IL.final') ## import device driver from R with importr to plot to PNG ## we can then call any function in the grdevices package grdevices = importr('grDevices') grdevices.png(file='/Users/user/Downloads/data/mapplot.png', width=1300, height=1000) ## plot the map ## note that the order matters when we add another layer in ggplot (here IL_railroads): first aes, then data, that's different from R ## (see http://permalink.gmane.org/gmane.comp.python.rpy/2349) ## note that we use dictionary to set the opts to be able to set options as keywords, for example legend.key.size p_map = ggplot2.ggplot(IL_final) + \ ggplot2.geom_polygon(ggplot2.aes(x = 'long', y = 'lat', group = 'group', color = 'ObamaShare', fill = 'ObamaShare')) + \ ggplot2.scale_fill_gradient(high = 'blue', low = 'red') + \ ggplot2.scale_fill_continuous(name = "Obama Vote Share") + \ ggplot2.scale_colour_continuous(name = "Obama Vote Share") + \ ggplot2.opts(**{'legend.position': 'left', 'legend.key.size': robjects.r.unit(2, 'lines'), 'legend.title' : ggplot2.theme_text(size = 14, hjust=0), \ 'legend.text': ggplot2.theme_text(size = 12), 'title' : "Obama Vote Share and Distance to Railroads in IL", \ 'plot.title': ggplot2.theme_text(size = 24), 'plot.margin': robjects.r.unit(robjects.r.rep(0,4),'lines'), \ 'panel.background': ggplot2.theme_blank(), 'panel.grid.minor': ggplot2.theme_blank(), 'panel.grid.major': ggplot2.theme_blank(), \ 'axis.ticks': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.title.y': ggplot2.theme_blank(), \ 'axis.title.x': ggplot2.theme_blank(), 'axis.title.x': ggplot2.theme_blank(), 'axis.text.x': ggplot2.theme_blank(), \ 'axis.text.y': ggplot2.theme_blank()} ) + \ ggplot2.geom_line(ggplot2.aes(x='long', y='lat', group='group'), data=IL_railroads, color='grey', size=0.2) + \ ggplot2.coord_equal() p_map.plot() ## add the scatterplot
temp_change = temp_change.rename(columns={'passenger_count':'percent_change'}) pickups_change = pd.concat([pickups_change, temp_change['percent_change']], axis=1) pickups_change = pickups_change.dropna() # Find top 10 neighborhoods with largest percent change print(tabulate(pickups_change[['percent_change', 'nbhd', 'borough']].drop_duplicates().sort(['percent_change'], ascending=False).head(10), tablefmt='pipe', headers='keys', showindex=False)) pickups_change['percent_change'] = np.log1p(pickups_change['percent_change'] + abs(min(pickups_change['percent_change']))) pickups_change = pd.merge(pickups_change, nbhd_borders, how='right', on=['nbhd']).dropna() p5 = ggplot2.ggplot(pandas2ri.py2ri(pickups_change)) + \ ggplot2.aes_string(x='lon', y='lat', group='nbhd', fill='percent_change') + \ ggplot2.geom_polygon() + \ ggplot2.scale_fill_gradient(low='blue', high='green') + \ ggplot2.theme(legend_position='bottom') + \ ggplot2.labs(x='', y='', title='Change In Annual # of Pickups (2010 - 2015)', fill='Percent Change\n(Log-Scale)') p5.save('./plots/2010_2015_percent_change.png', width=5, height=6) ################################################################# ################################################################# ################ MODEL FITTING ################################## ################################################################# ################################################################# # Do some quick benchmarks (predict 2015 from 2014 data) from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import RandomizedSearchCV from scipy.stats import randint as sp_randint